From 03bd7dda258e39d8e2687d1b8833ea60c82cec4f Mon Sep 17 00:00:00 2001 From: Georg Bremer Date: Tue, 18 Jun 2024 15:59:02 +0200 Subject: [PATCH] chore: Reduce language detection threshold for MeetingTemplates (#9855) --- .../embedder/__tests__/inferLanguage.test.ts | 41 ++++++++++++++++ packages/embedder/indexing/meetingTemplate.ts | 47 ++++++++++--------- packages/embedder/jest.config.js | 21 +++++++++ packages/embedder/package.json | 5 ++ 4 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 packages/embedder/__tests__/inferLanguage.test.ts create mode 100644 packages/embedder/jest.config.js diff --git a/packages/embedder/__tests__/inferLanguage.test.ts b/packages/embedder/__tests__/inferLanguage.test.ts new file mode 100644 index 00000000000..031c560b546 --- /dev/null +++ b/packages/embedder/__tests__/inferLanguage.test.ts @@ -0,0 +1,41 @@ +import {inferLanguage} from '../inferLanguage' + +test('should infer the language of a string', () => { + expect( + inferLanguage( + ` +Weighted Shortest Job First +planning poker, sprint poker, estimation +Story Points + +Fibonacci +1, 2, 3, 5, 8, 13, 21, 34, ?, Pass +Story Value + +Fibonacci +1, 2, 3, 5, 8, 13, 21, 34, ?, Pass + `, + 10 + ) + ).toBe('en') + /* ideally this would be detected as 'en', but it isn't + expect( + inferLanguage( + ` +*New Template #1 +New prompt + `, + 10 + ) + ).toBe('en') + */ + expect( + inferLanguage( + ` +Was lief gut? +Höhepunkte + `, + 10 + ) + ).not.toBe('en') +}) diff --git a/packages/embedder/indexing/meetingTemplate.ts b/packages/embedder/indexing/meetingTemplate.ts index b41796c1b13..63073f3bfa0 100644 --- a/packages/embedder/indexing/meetingTemplate.ts +++ b/packages/embedder/indexing/meetingTemplate.ts @@ -4,6 +4,8 @@ import PokerTemplate from '../../server/database/types/PokerTemplate' import ReflectTemplate from '../../server/database/types/ReflectTemplate' import {inferLanguage} from '../inferLanguage' +const MIN_TEXT_LENGTH = 10 + const createTextFromRetrospectiveMeetingTemplate = async ( template: ReflectTemplate, dataLoader: DataLoaderInstance @@ -14,15 +16,21 @@ const createTextFromRetrospectiveMeetingTemplate = async ( return `${question}\n${description}` }) .join('\n') - return `${template.name}\nRetrospective\n${promptText}` + const body = `${template.name}\nRetrospective\n${promptText}` + const language = inferLanguage(`${template.name}\n${promptText}`, MIN_TEXT_LENGTH) + return {body, language} } const createTextFromTeamPromptMeetingTemplate = async (template: MeetingTemplate) => { - return `${template.name}\nteam prompt, daily standup, status update` + const body = `${template.name}\nteam prompt, daily standup, status update` + const language = inferLanguage(template.name, MIN_TEXT_LENGTH) + return {body, language} } const createTextFromActionMeetingTemplate = async (template: MeetingTemplate) => { - return `${template.name}\ncheck-in, action, task, todo, follow-up` + const body = `${template.name}\ncheck-in, action, task, todo, follow-up` + const language = inferLanguage(template.name, MIN_TEXT_LENGTH) + return {body, language} } const createTextFromPokerMeetingTemplate = async ( @@ -39,7 +47,9 @@ const createTextFromPokerMeetingTemplate = async ( }) ) ).join('\n') - return `${template.name}\nplanning poker, sprint poker, estimation\n${dimensionsText}` + const body = `${template.name}\nplanning poker, sprint poker, estimation\n${dimensionsText}` + const language = inferLanguage(`${template.name}\n${dimensionsText}`, MIN_TEXT_LENGTH) + return {body, language} } export const createTextFromMeetingTemplate = async ( @@ -47,21 +57,16 @@ export const createTextFromMeetingTemplate = async ( dataLoader: DataLoaderInstance ) => { const template = await dataLoader.get('meetingTemplates').load(templateId) - const body = await (() => { - switch (template?.type) { - case 'retrospective': - return createTextFromRetrospectiveMeetingTemplate(template, dataLoader) - case 'teamPrompt': - return createTextFromTeamPromptMeetingTemplate(template) - case 'action': - return createTextFromActionMeetingTemplate(template) - case 'poker': - return createTextFromPokerMeetingTemplate(template, dataLoader) - default: - return '' - } - })() - - const language = inferLanguage(body) - return {body, language} + switch (template?.type) { + case 'retrospective': + return createTextFromRetrospectiveMeetingTemplate(template, dataLoader) + case 'teamPrompt': + return createTextFromTeamPromptMeetingTemplate(template) + case 'action': + return createTextFromActionMeetingTemplate(template) + case 'poker': + return createTextFromPokerMeetingTemplate(template, dataLoader) + default: + return {body: '', language: undefined} + } } diff --git a/packages/embedder/jest.config.js b/packages/embedder/jest.config.js new file mode 100644 index 00000000000..a31ec7eec65 --- /dev/null +++ b/packages/embedder/jest.config.js @@ -0,0 +1,21 @@ +const tsJestPresets = require('ts-jest/presets') + +module.exports = { + testEnvironment: 'node', + transform: { + '^.+\\.tsx?$': [ + 'ts-jest', + { + diagnostics: false + } + ] + }, + modulePaths: ['/packages/'], + moduleNameMapper: { + 'server/(.*)': ['/$1'], + 'parabol-client/(.*)': ['/../client/$1'], + '~/(.*)': ['/../client/$1'] + }, + testRegex: '/__tests__/.*.test\\.ts?$', + clearMocks: true +} diff --git a/packages/embedder/package.json b/packages/embedder/package.json index d99f9a662d0..29ab04cf812 100644 --- a/packages/embedder/package.json +++ b/packages/embedder/package.json @@ -15,6 +15,7 @@ "lint:check": "eslint . --ext .ts,.tsx", "prettier": "prettier --config ../../.prettierrc --write \"**/*.{ts,tsx}\"", "prettier:check": "prettier --config ../../.prettierrc --check \"**/*.{ts,tsx}\"", + "test": "jest --verbose", "typecheck": "yarn tsc --noEmit -p tsconfig.json" }, "bugs": { @@ -26,8 +27,12 @@ "@types/franc": "^5.0.3", "@types/node": "^16.11.62", "babel-plugin-inline-import": "^3.0.0", + "jest": "^29.5.0", + "jest-extended": "^3.2.4", + "jest-junit": "^16.0.0", "openapi-fetch": "^0.9.3", "sucrase": "^3.32.0", + "ts-jest": "^29.1.0", "ts-node-dev": "^1.0.0-pre.44", "typescript": "^5.3.3" },