Skip to content

Commit

Permalink
linting and cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
epipav committed Nov 5, 2024
1 parent be18b12 commit 3f6ca89
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import {
} from '../../types'
import { IMemberEnrichmentDataProgAI, IMemberEnrichmentDataProgAIResponse } from '../progai/types'

import { IMemberEnrichmentDataProgAILinkedinScraper } from './types'

export default class EnrichmentServiceProgAILinkedinScraper
extends LoggerBase
implements IEnrichmentService
Expand All @@ -26,16 +28,13 @@ export default class EnrichmentServiceProgAILinkedinScraper

public enrichableBySql = `(mi.verified AND mi.type = 'username' and mi.platform = 'linkedin')`

// bust cache after 120 days
public cacheObsoleteAfterSeconds = 60 * 60 * 24 * 120
public cacheObsoleteAfterSeconds = 60 * 60 * 24 * 90

constructor(public readonly log: Logger) {
super(log)
}

// in addition to members with linkedin identity
// we'll also use existing cache rows from other sources (serp and clearbit)
// if there are linkedin urls there as well, we'll enrich using these also
// in addition to members with linkedin identity we'll also use existing cache rows from other sources (serp and clearbit)
async isEnrichableBySource(input: IEnrichmentSourceInput): Promise<boolean> {
const caches = await findMemberEnrichmentCacheForAllSources(input.memberId)

Expand Down Expand Up @@ -68,10 +67,15 @@ export default class EnrichmentServiceProgAILinkedinScraper

private async findConsumableLinkedinIdentities(
input: IEnrichmentSourceInput,
): Promise<IMemberIdentity[]> {
const consumableIdentities: IMemberIdentity[] = []
): Promise<
(IMemberIdentity & { repeatedTimesInDifferentSources: number; isFromVerifiedSource: boolean })[]
> {
const consumableIdentities: (IMemberIdentity & {
repeatedTimesInDifferentSources: number
isFromVerifiedSource: boolean
})[] = []
const caches = await findMemberEnrichmentCacheForAllSources(input.memberId)
const linkedinUrlHashmap = new Map<string, boolean>()
const linkedinUrlHashmap = new Map<string, number>()

for (const cache of caches) {
if (this.alsoFindInputsInSourceCaches.includes(cache.source)) {
Expand All @@ -83,65 +87,85 @@ export default class EnrichmentServiceProgAILinkedinScraper
if (normalized.identities.some((i) => i.platform === PlatformType.LINKEDIN)) {
const identity = normalized.identities.find((i) => i.platform === PlatformType.LINKEDIN)
if (!linkedinUrlHashmap.get(identity.value)) {
consumableIdentities.push(identity)
linkedinUrlHashmap.set(identity.value, true)
consumableIdentities.push({
...identity,
repeatedTimesInDifferentSources: 1,
isFromVerifiedSource: false,
})
linkedinUrlHashmap.set(identity.value, 1)
} else {
const repeatedTimesInDifferentSources = linkedinUrlHashmap.get(identity.value) + 1
linkedinUrlHashmap.set(identity.value, repeatedTimesInDifferentSources)
consumableIdentities.find(
(i) => i.value === identity.value,
).repeatedTimesInDifferentSources = repeatedTimesInDifferentSources
}
}
}
}

// also add the linkedin identity from the input
if (
input.linkedin &&
input.linkedin.value &&
input.linkedin.verified &&
!linkedinUrlHashmap.get(input.linkedin.value)
) {
consumableIdentities.push(input.linkedin)
if (input.linkedin && input.linkedin.value && input.linkedin.verified) {
if (!linkedinUrlHashmap.get(input.linkedin.value)) {
consumableIdentities.push({
...input.linkedin,
repeatedTimesInDifferentSources: 1,
isFromVerifiedSource: true,
})
} else {
const repeatedTimesInDifferentSources = linkedinUrlHashmap.get(input.linkedin.value) + 1
const identityFound = consumableIdentities.find((i) => i.value === input.linkedin.value)

identityFound.repeatedTimesInDifferentSources = repeatedTimesInDifferentSources
identityFound.isFromVerifiedSource = true
}
}

return consumableIdentities
}

async getData(input: IEnrichmentSourceInput): Promise<IMemberEnrichmentDataProgAI[] | null> {
const profiles: IMemberEnrichmentDataProgAI[] = []
async getData(
input: IEnrichmentSourceInput,
): Promise<IMemberEnrichmentDataProgAILinkedinScraper[] | null> {
const profiles: IMemberEnrichmentDataProgAILinkedinScraper[] = []
const consumableIdentities = await this.findConsumableLinkedinIdentities(input)

for (const identity of consumableIdentities) {
const data = await this.getDataUsingLinkedinHandle(identity.value)
if (data) {
profiles.push(data)
profiles.push({
...data,
metadata: {
repeatedTimesInDifferentSources: identity.repeatedTimesInDifferentSources,
isFromVerifiedSource: identity.isFromVerifiedSource,
},
})
}
}

return profiles.length > 0 ? profiles : null
}

private async getDataUsingLinkedinHandle(handle: string): Promise<IMemberEnrichmentDataProgAI> {
let response: IMemberEnrichmentDataProgAIResponse

try {
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
linkedin_url: `https://linkedin.com/in/${handle}`,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

response = (await axios(config)).data
} catch (err) {
throw new Error(err)
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
linkedin_url: `https://linkedin.com/in/${handle}`,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

return response.profile
const response: IMemberEnrichmentDataProgAIResponse = (await axios(config)).data

return response?.profile || null
}

normalize(profiles: IMemberEnrichmentDataProgAI[]): IMemberEnrichmentDataNormalized[] {
normalize(
profiles: IMemberEnrichmentDataProgAILinkedinScraper[],
): IMemberEnrichmentDataNormalized[] {
const normalizedProfiles: IMemberEnrichmentDataNormalized[] = []
const progaiService = EnrichmentSourceServiceFactory.getEnrichmentSourceService(
MemberEnrichmentSource.PROGAI,
Expand All @@ -150,7 +174,7 @@ export default class EnrichmentServiceProgAILinkedinScraper

for (const profile of profiles) {
const normalized = progaiService.normalize(profile) as IMemberEnrichmentDataNormalized
normalizedProfiles.push(normalized)
normalizedProfiles.push({ ...normalized, metadata: profile.metadata })
}

return normalizedProfiles.length > 0 ? normalizedProfiles : null
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { IMemberEnrichmentDataProgAI } from '../progai/types'

export interface IMemberEnrichmentDataProgAILinkedinScraper extends IMemberEnrichmentDataProgAI {
metadata: {
repeatedTimesInDifferentSources: number
isFromVerifiedSource: boolean
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -309,47 +309,36 @@ export default class EnrichmentServiceProgAI extends LoggerBase implements IEnri
}

async getDataUsingGitHubHandle(githubUsername: string): Promise<IMemberEnrichmentDataProgAI> {
let response: IMemberEnrichmentDataProgAIResponse

try {
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
github_handle: githubUsername,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

response = (await axios(config)).data
} catch (err) {
throw new Error(err)
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
github_handle: githubUsername,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

return response.profile
const response: IMemberEnrichmentDataProgAIResponse = (await axios(config)).data
return response?.profile || null
}

async getDataUsingEmailAddress(email: string): Promise<IMemberEnrichmentDataProgAI> {
try {
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
email,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

const response = (await axios(config)).data
return response.profile
} catch (err) {
throw new Error(err)
const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile`
const config = {
method: 'get',
url,
params: {
email,
with_emails: true,
api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'],
},
headers: {},
}

const response = (await axios(config)).data
return response?.profile || null
}
}
4 changes: 3 additions & 1 deletion services/apps/premium/members_enrichment_worker/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from '@crowd/types'

import { IMemberEnrichmentDataClearbit } from './sources/clearbit/types'
import { IMemberEnrichmentDataProgAILinkedinScraper } from './sources/progai-linkedin-scraper/types'
import { IMemberEnrichmentDataProgAI } from './sources/progai/types'
import { IMemberEnrichmentDataSerp } from './sources/serp/types'

Expand All @@ -26,9 +27,9 @@ export interface IEnrichmentSourceInput {

export type IMemberEnrichmentData =
| IMemberEnrichmentDataProgAI
| IMemberEnrichmentDataProgAI[]
| IMemberEnrichmentDataClearbit
| IMemberEnrichmentDataSerp
| IMemberEnrichmentDataProgAILinkedinScraper[]

export interface IEnrichmentService {
source: MemberEnrichmentSource
Expand Down Expand Up @@ -62,6 +63,7 @@ export interface IMemberEnrichmentDataNormalized {
attributes?: IAttributes
memberOrganizations?: IMemberEnrichmentDataNormalizedOrganization[]
displayName?: string
metadata?: Record<string, unknown>
}

export interface IMemberEnrichmentDataNormalizedOrganization {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -546,5 +546,5 @@ export async function findMemberEnrichmentCacheForAllSourcesDb<T>(
{ memberId },
)

return result ?? null
return result ?? []
}

0 comments on commit 3f6ca89

Please sign in to comment.