From 3f6ca897e3903301ea7f11814111a1b5bc0fb611 Mon Sep 17 00:00:00 2001 From: anilb Date: Tue, 5 Nov 2024 15:11:05 +0100 Subject: [PATCH] linting and cleaning --- .../progai-linkedin-scraper/service.ts | 108 +++++++++++------- .../sources/progai-linkedin-scraper/types.ts | 8 ++ .../src/sources/progai/service.ts | 61 ++++------ .../members_enrichment_worker/src/types.ts | 4 +- .../members_enrichment_worker/index.ts | 2 +- 5 files changed, 103 insertions(+), 80 deletions(-) diff --git a/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/service.ts b/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/service.ts index fb54c8b6d7..3b6fcb0885 100644 --- a/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/service.ts +++ b/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/service.ts @@ -12,6 +12,8 @@ import { } from '../../types' import { IMemberEnrichmentDataProgAI, IMemberEnrichmentDataProgAIResponse } from '../progai/types' +import { IMemberEnrichmentDataProgAILinkedinScraper } from './types' + export default class EnrichmentServiceProgAILinkedinScraper extends LoggerBase implements IEnrichmentService @@ -26,16 +28,13 @@ export default class EnrichmentServiceProgAILinkedinScraper public enrichableBySql = `(mi.verified AND mi.type = 'username' and mi.platform = 'linkedin')` - // bust cache after 120 days - public cacheObsoleteAfterSeconds = 60 * 60 * 24 * 120 + public cacheObsoleteAfterSeconds = 60 * 60 * 24 * 90 constructor(public readonly log: Logger) { super(log) } - // in addition to members with linkedin identity - // we'll also use existing cache rows from other sources (serp and clearbit) - // if there are linkedin urls there as well, we'll enrich using these also + // in addition to members with linkedin identity we'll also use existing cache rows from other sources (serp and clearbit) async isEnrichableBySource(input: IEnrichmentSourceInput): Promise { const caches = await findMemberEnrichmentCacheForAllSources(input.memberId) @@ -68,10 +67,15 @@ export default class EnrichmentServiceProgAILinkedinScraper private async findConsumableLinkedinIdentities( input: IEnrichmentSourceInput, - ): Promise { - const consumableIdentities: IMemberIdentity[] = [] + ): Promise< + (IMemberIdentity & { repeatedTimesInDifferentSources: number; isFromVerifiedSource: boolean })[] + > { + const consumableIdentities: (IMemberIdentity & { + repeatedTimesInDifferentSources: number + isFromVerifiedSource: boolean + })[] = [] const caches = await findMemberEnrichmentCacheForAllSources(input.memberId) - const linkedinUrlHashmap = new Map() + const linkedinUrlHashmap = new Map() for (const cache of caches) { if (this.alsoFindInputsInSourceCaches.includes(cache.source)) { @@ -83,34 +87,58 @@ export default class EnrichmentServiceProgAILinkedinScraper if (normalized.identities.some((i) => i.platform === PlatformType.LINKEDIN)) { const identity = normalized.identities.find((i) => i.platform === PlatformType.LINKEDIN) if (!linkedinUrlHashmap.get(identity.value)) { - consumableIdentities.push(identity) - linkedinUrlHashmap.set(identity.value, true) + consumableIdentities.push({ + ...identity, + repeatedTimesInDifferentSources: 1, + isFromVerifiedSource: false, + }) + linkedinUrlHashmap.set(identity.value, 1) + } else { + const repeatedTimesInDifferentSources = linkedinUrlHashmap.get(identity.value) + 1 + linkedinUrlHashmap.set(identity.value, repeatedTimesInDifferentSources) + consumableIdentities.find( + (i) => i.value === identity.value, + ).repeatedTimesInDifferentSources = repeatedTimesInDifferentSources } } } } // also add the linkedin identity from the input - if ( - input.linkedin && - input.linkedin.value && - input.linkedin.verified && - !linkedinUrlHashmap.get(input.linkedin.value) - ) { - consumableIdentities.push(input.linkedin) + if (input.linkedin && input.linkedin.value && input.linkedin.verified) { + if (!linkedinUrlHashmap.get(input.linkedin.value)) { + consumableIdentities.push({ + ...input.linkedin, + repeatedTimesInDifferentSources: 1, + isFromVerifiedSource: true, + }) + } else { + const repeatedTimesInDifferentSources = linkedinUrlHashmap.get(input.linkedin.value) + 1 + const identityFound = consumableIdentities.find((i) => i.value === input.linkedin.value) + + identityFound.repeatedTimesInDifferentSources = repeatedTimesInDifferentSources + identityFound.isFromVerifiedSource = true + } } - return consumableIdentities } - async getData(input: IEnrichmentSourceInput): Promise { - const profiles: IMemberEnrichmentDataProgAI[] = [] + async getData( + input: IEnrichmentSourceInput, + ): Promise { + const profiles: IMemberEnrichmentDataProgAILinkedinScraper[] = [] const consumableIdentities = await this.findConsumableLinkedinIdentities(input) for (const identity of consumableIdentities) { const data = await this.getDataUsingLinkedinHandle(identity.value) if (data) { - profiles.push(data) + profiles.push({ + ...data, + metadata: { + repeatedTimesInDifferentSources: identity.repeatedTimesInDifferentSources, + isFromVerifiedSource: identity.isFromVerifiedSource, + }, + }) } } @@ -118,30 +146,26 @@ export default class EnrichmentServiceProgAILinkedinScraper } private async getDataUsingLinkedinHandle(handle: string): Promise { - let response: IMemberEnrichmentDataProgAIResponse - - try { - const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` - const config = { - method: 'get', - url, - params: { - linkedin_url: `https://linkedin.com/in/${handle}`, - with_emails: true, - api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], - }, - headers: {}, - } - - response = (await axios(config)).data - } catch (err) { - throw new Error(err) + const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` + const config = { + method: 'get', + url, + params: { + linkedin_url: `https://linkedin.com/in/${handle}`, + with_emails: true, + api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], + }, + headers: {}, } - return response.profile + const response: IMemberEnrichmentDataProgAIResponse = (await axios(config)).data + + return response?.profile || null } - normalize(profiles: IMemberEnrichmentDataProgAI[]): IMemberEnrichmentDataNormalized[] { + normalize( + profiles: IMemberEnrichmentDataProgAILinkedinScraper[], + ): IMemberEnrichmentDataNormalized[] { const normalizedProfiles: IMemberEnrichmentDataNormalized[] = [] const progaiService = EnrichmentSourceServiceFactory.getEnrichmentSourceService( MemberEnrichmentSource.PROGAI, @@ -150,7 +174,7 @@ export default class EnrichmentServiceProgAILinkedinScraper for (const profile of profiles) { const normalized = progaiService.normalize(profile) as IMemberEnrichmentDataNormalized - normalizedProfiles.push(normalized) + normalizedProfiles.push({ ...normalized, metadata: profile.metadata }) } return normalizedProfiles.length > 0 ? normalizedProfiles : null diff --git a/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/types.ts b/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/types.ts index e69de29bb2..af7bda13c7 100644 --- a/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/types.ts +++ b/services/apps/premium/members_enrichment_worker/src/sources/progai-linkedin-scraper/types.ts @@ -0,0 +1,8 @@ +import { IMemberEnrichmentDataProgAI } from '../progai/types' + +export interface IMemberEnrichmentDataProgAILinkedinScraper extends IMemberEnrichmentDataProgAI { + metadata: { + repeatedTimesInDifferentSources: number + isFromVerifiedSource: boolean + } +} diff --git a/services/apps/premium/members_enrichment_worker/src/sources/progai/service.ts b/services/apps/premium/members_enrichment_worker/src/sources/progai/service.ts index 1f2859ba0f..5b6dc736f4 100644 --- a/services/apps/premium/members_enrichment_worker/src/sources/progai/service.ts +++ b/services/apps/premium/members_enrichment_worker/src/sources/progai/service.ts @@ -309,47 +309,36 @@ export default class EnrichmentServiceProgAI extends LoggerBase implements IEnri } async getDataUsingGitHubHandle(githubUsername: string): Promise { - let response: IMemberEnrichmentDataProgAIResponse - - try { - const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` - const config = { - method: 'get', - url, - params: { - github_handle: githubUsername, - with_emails: true, - api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], - }, - headers: {}, - } - - response = (await axios(config)).data - } catch (err) { - throw new Error(err) + const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` + const config = { + method: 'get', + url, + params: { + github_handle: githubUsername, + with_emails: true, + api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], + }, + headers: {}, } - return response.profile + const response: IMemberEnrichmentDataProgAIResponse = (await axios(config)).data + return response?.profile || null } async getDataUsingEmailAddress(email: string): Promise { - try { - const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` - const config = { - method: 'get', - url, - params: { - email, - with_emails: true, - api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], - }, - headers: {}, - } - - const response = (await axios(config)).data - return response.profile - } catch (err) { - throw new Error(err) + const url = `${process.env['CROWD_ENRICHMENT_PROGAI_URL']}/get_profile` + const config = { + method: 'get', + url, + params: { + email, + with_emails: true, + api_key: process.env['CROWD_ENRICHMENT_PROGAI_API_KEY'], + }, + headers: {}, } + + const response = (await axios(config)).data + return response?.profile || null } } diff --git a/services/apps/premium/members_enrichment_worker/src/types.ts b/services/apps/premium/members_enrichment_worker/src/types.ts index b99f486124..4229dea19e 100644 --- a/services/apps/premium/members_enrichment_worker/src/types.ts +++ b/services/apps/premium/members_enrichment_worker/src/types.ts @@ -10,6 +10,7 @@ import { } from '@crowd/types' import { IMemberEnrichmentDataClearbit } from './sources/clearbit/types' +import { IMemberEnrichmentDataProgAILinkedinScraper } from './sources/progai-linkedin-scraper/types' import { IMemberEnrichmentDataProgAI } from './sources/progai/types' import { IMemberEnrichmentDataSerp } from './sources/serp/types' @@ -26,9 +27,9 @@ export interface IEnrichmentSourceInput { export type IMemberEnrichmentData = | IMemberEnrichmentDataProgAI - | IMemberEnrichmentDataProgAI[] | IMemberEnrichmentDataClearbit | IMemberEnrichmentDataSerp + | IMemberEnrichmentDataProgAILinkedinScraper[] export interface IEnrichmentService { source: MemberEnrichmentSource @@ -62,6 +63,7 @@ export interface IMemberEnrichmentDataNormalized { attributes?: IAttributes memberOrganizations?: IMemberEnrichmentDataNormalizedOrganization[] displayName?: string + metadata?: Record } export interface IMemberEnrichmentDataNormalizedOrganization { diff --git a/services/libs/data-access-layer/src/old/apps/premium/members_enrichment_worker/index.ts b/services/libs/data-access-layer/src/old/apps/premium/members_enrichment_worker/index.ts index 2ab8a9d3d2..ec0df7b091 100644 --- a/services/libs/data-access-layer/src/old/apps/premium/members_enrichment_worker/index.ts +++ b/services/libs/data-access-layer/src/old/apps/premium/members_enrichment_worker/index.ts @@ -546,5 +546,5 @@ export async function findMemberEnrichmentCacheForAllSourcesDb( { memberId }, ) - return result ?? null + return result ?? [] }