diff --git a/lib/shared/src/experimentation/FeatureFlagProvider.ts b/lib/shared/src/experimentation/FeatureFlagProvider.ts index 366f03bc94f..d64e4c31369 100644 --- a/lib/shared/src/experimentation/FeatureFlagProvider.ts +++ b/lib/shared/src/experimentation/FeatureFlagProvider.ts @@ -24,13 +24,13 @@ export enum FeatureFlag { CodyAutocompleteDeepseekV2LiteBase = 'cody-autocomplete-deepseek-v2-lite-base', // Enable various feature flags to experiment with FIM trained fine-tuned models via Fireworks - CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag', - CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control', - CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best', - CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1', - CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2', - CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3', - CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4', + CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag-v1', + CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control-v1', + CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best-v1', + CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1-v1', + CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2-v1', + CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3-v1', + CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4-v1', // Enables Claude 3 if the user is in our holdout group CodyAutocompleteClaude3 = 'cody-autocomplete-claude-3', diff --git a/vscode/src/completions/providers/create-provider.ts b/vscode/src/completions/providers/create-provider.ts index 9601e894b8b..acc3b9bf108 100644 --- a/vscode/src/completions/providers/create-provider.ts +++ b/vscode/src/completions/providers/create-provider.ts @@ -20,12 +20,11 @@ import { import { createProviderConfig as createExperimentalOllamaProviderConfig } from './experimental-ollama' import { createProviderConfig as createExperimentalOpenAICompatibleProviderConfig } from './expopenaicompatible' import { - CODE_QWEN_7B, - DEEPSEEK_CODER_7B, DEEPSEEK_CODER_V2_LITE_BASE, - FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED, - FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED, - FIREWORKS_FIM_FINE_TUNED_MODEL_HYBRID, + DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096, + DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192, + DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384, + DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768, type FireworksOptions, createProviderConfig as createFireworksProviderConfig, } from './fireworks' @@ -222,29 +221,28 @@ async function resolveFIMModelExperimentFromFeatureFlags(): ReturnType< FeatureFlag.CodyAutocompleteFIMModelExperimentCurrentBest ), ]) - if (fimModelVariant1) { // Variant 1: Current production model with +200msec latency to quantity the effect of latency increase while keeping same quality - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 } } if (fimModelVariant2) { - return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192 } } if (fimModelVariant3) { - return { provider: 'fireworks', model: CODE_QWEN_7B } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384 } } if (fimModelVariant4) { - return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768 } } if (fimModelCurrentBest) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_7B } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } } if (fimModelControl) { // Current production model - return { provider: 'fireworks', model: 'starcoder-hybrid' } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } } // Extra free traffic - redirect to the current production model which could be different than control - return { provider: 'fireworks', model: 'starcoder-hybrid' } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } } async function resolveDefaultModelFromVSCodeConfigOrFeatureFlags( @@ -257,24 +255,16 @@ async function resolveDefaultModelFromVSCodeConfigOrFeatureFlags( if (configuredProvider) { return { provider: configuredProvider } } - - const [ - starCoder2Hybrid, - starCoderHybrid, - claude3, - finetunedFIMModelHybrid, - fimModelExperimentFlag, - deepseekV2LiteBase, - ] = await Promise.all([ - featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteStarCoder2Hybrid), - featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteStarCoderHybrid), - featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteClaude3), - featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteFIMFineTunedModelHybrid), - featureFlagProvider.evaluateFeatureFlag( - FeatureFlag.CodyAutocompleteFIMModelExperimentBaseFeatureFlag - ), - featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteDeepseekV2LiteBase), - ]) + const [starCoder2Hybrid, starCoderHybrid, claude3, fimModelExperimentFlag, deepseekV2LiteBase] = + await Promise.all([ + featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteStarCoder2Hybrid), + featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteStarCoderHybrid), + featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteClaude3), + featureFlagProvider.evaluateFeatureFlag( + FeatureFlag.CodyAutocompleteFIMModelExperimentBaseFeatureFlag + ), + featureFlagProvider.evaluateFeatureFlag(FeatureFlag.CodyAutocompleteDeepseekV2LiteBase), + ]) // We run fine tuning experiment for VSC client only. // We disable for all agent clients like the JetBrains plugin. @@ -286,15 +276,9 @@ async function resolveDefaultModelFromVSCodeConfigOrFeatureFlags( // The traffic in this feature flag is interpreted as a traffic allocated to the fine-tuned experiment. return resolveFIMModelExperimentFromFeatureFlags() } - if (isDotCom && deepseekV2LiteBase) { return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } } - - if (finetunedFIMModelHybrid) { - return { provider: 'fireworks', model: FIREWORKS_FIM_FINE_TUNED_MODEL_HYBRID } - } - if (starCoder2Hybrid) { return { provider: 'fireworks', model: 'starcoder2-hybrid' } } diff --git a/vscode/src/completions/providers/fireworks.ts b/vscode/src/completions/providers/fireworks.ts index 1ca828df5e1..726a38b6096 100644 --- a/vscode/src/completions/providers/fireworks.ts +++ b/vscode/src/completions/providers/fireworks.ts @@ -93,6 +93,13 @@ export const DEEPSEEK_CODER_1P3_B = 'deepseek-coder-1p3b' export const DEEPSEEK_CODER_7B = 'deepseek-coder-7b' // Huggingface link (https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Base) export const DEEPSEEK_CODER_V2_LITE_BASE = 'deepseek-coder-v2-lite-base' + +// Context window experiments with DeepSeek Model +export const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 = 'deepseek-coder-v2-lite-base-context-4096' +export const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192 = 'deepseek-coder-v2-lite-base-context-8192' +export const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384 = 'deepseek-coder-v2-lite-base-context-16383' +export const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768 = 'deepseek-coder-v2-lite-base-context-32768' + // Huggingface link (https://huggingface.co/Qwen/CodeQwen1.5-7B) export const CODE_QWEN_7B = 'code-qwen-7b' @@ -121,6 +128,12 @@ const MODEL_MAP = { [DEEPSEEK_CODER_7B]: 'fireworks/accounts/sourcegraph/models/deepseek-coder-7b-base', [DEEPSEEK_CODER_V2_LITE_BASE]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', [CODE_QWEN_7B]: 'accounts/sourcegraph/models/code-qwen-1p5-7b', + [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', + [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', + [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384]: + 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', + [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768]: + 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', } type FireworksModel = @@ -157,6 +170,14 @@ function getMaxContextTokens(model: FireworksModel): number { case CODE_QWEN_7B: { return 2048 } + case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096: + return 4096 + case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192: + return 8192 + case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384: + return 16384 + case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768: + return 32768 default: return 1200 } @@ -550,7 +571,6 @@ class FireworksProvider extends Provider { languageId: self.options.document.languageId, anonymousUserID: self.anonymousUserID, } - const headers = new Headers(self.getCustomHeaders()) // Force HTTP connection reuse to reduce latency. // c.f. https://github.com/microsoft/vscode/issues/173861