diff --git a/modules/openvino_code/DEVELOPER.md b/modules/openvino_code/DEVELOPER.md new file mode 100644 index 000000000..84a286ee0 --- /dev/null +++ b/modules/openvino_code/DEVELOPER.md @@ -0,0 +1,75 @@ +# OpenVINO Code - VSCode extension for AI code completion with OpenVINO™ + +VSCode extension for helping developers writing code with AI code assistant. OpenVINO Code is working with Large Language Model for Code (Code LLM) deployed on local or remote server. + +## Installing Extension + +VSCode extension can be installed from built `*.vsix` file: + +1. Open `Extensions` side bar in VSCode. +2. Click on the menu icon (three dots menu icon aka "meatballs" icon) in the top right corner of Extensions side panel. +3. Select "Instal from VSIX..." option and select extension file. + +For instructions on how to build extension `vsix` file please refer to the [Build Extension](#build-extension) section. + +## Extension Configuration + +To work with extension you should configure endpoint to server with Code LLM where requests will be sent: + +1. Open extension settings. +2. Fill `Server URL` parameter with server endpoint URL. + +For instructions on how to start server locally please refer to the [server README.md](./server/README.md). + +Also in extension settings you can configure special tokens. + +## Working with Extension + +TDB + +1. Create a new python file +2. Try typing `def main():` +3. Press shortcut buttons (TBD) for code completion + +### Checking output + +You can see input to and output from the code generation API: + +1. Open VSCode `OUTPUT` panel +2. Select extension output source from the dropdown menu + +## Developing + +> **Prerequisite:** You should have `Node.js` installed (v16 and above). + +#### Install dependencies + +To install dependencies run the following command from the project root directory: + +``` +npm install +``` + +#### Run Extension from Source & Debugging + +Open `Run and Debug` side bar in VSCode and click `Launch Extension` (or press `F5`). + +#### Build Extension + +To build extension and generate `*.vsix` file for further installation in VSCode, run the following command: + +``` +npm run vsce:package +``` + +#### Linting + +To perform linting with `ESLint`, execute the following command: + +``` +npm run lint +``` + +#### Testing + +TBD diff --git a/modules/openvino_code/README.md b/modules/openvino_code/README.md index 6558cf465..6fd9be3df 100644 --- a/modules/openvino_code/README.md +++ b/modules/openvino_code/README.md @@ -13,7 +13,7 @@ OpenVINO Code provides the following features: 1. Create a new python file 2. Try typing `def main():` -3. Press shortcut buttons (TBD) for code completion +3. Press shortcut button `ctrl+alt+space` for code completion ### Checking output diff --git a/modules/openvino_code/openvino-code-completion-0.0.2.vsix b/modules/openvino_code/openvino-code-completion-0.0.2.vsix deleted file mode 100644 index cb7d8c923..000000000 Binary files a/modules/openvino_code/openvino-code-completion-0.0.2.vsix and /dev/null differ diff --git a/modules/openvino_code/openvino-code-completion-0.0.3.vsix b/modules/openvino_code/openvino-code-completion-0.0.3.vsix new file mode 100644 index 000000000..5fa0d2672 Binary files /dev/null and b/modules/openvino_code/openvino-code-completion-0.0.3.vsix differ diff --git a/modules/openvino_code/package-lock.json b/modules/openvino_code/package-lock.json index e07168a2c..73ae54383 100644 --- a/modules/openvino_code/package-lock.json +++ b/modules/openvino_code/package-lock.json @@ -1,12 +1,12 @@ { "name": "openvino-code-completion", - "version": "0.0.2", + "version": "0.0.3", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "openvino-code-completion", - "version": "0.0.2", + "version": "0.0.3", "license": "https://github.com/openvinotoolkit/openvino_contrib/blob/master/LICENSE", "workspaces": [ "side-panel-ui" diff --git a/modules/openvino_code/package.json b/modules/openvino_code/package.json index 95888d54f..af027523a 100644 --- a/modules/openvino_code/package.json +++ b/modules/openvino_code/package.json @@ -1,7 +1,7 @@ { "publisher": "OpenVINO", "name": "openvino-code-completion", - "version": "0.0.2", + "version": "0.0.3", "displayName": "OpenVINO Code Completion", "description": "VSCode extension for AI code completion with OpenVINO", "icon": "media/logo.png", @@ -188,38 +188,44 @@ "default": 30, "markdownDescription": "Server request timeout in seconds after which request will be aborted." }, - "openvinoCode.fillInTheMiddleMode": { + "openvinoCode.streamInlineCompletion": { "order": 3, "type": "boolean", + "default": "false", + "description": "When checked inline complention will be generated in streaming mode" + }, + "openvinoCode.fillInTheMiddleMode": { + "order": 4, + "type": "boolean", "default": false, "markdownDescription": "When checked, text before (above) and after (below) the cursor will be used for completion generation. When unckecked, only text before (above) the cursor will be used." }, "openvinoCode.temperature": { - "order": 4, + "order": 5, "type": "number", "default": 0.2, - "description": "Sampling temperature." + "description": "Non-zero value. The higher the value, the more diverse the code suggestions and the lower temperature emphasizes the most likely words." }, "openvinoCode.topK": { - "order": 4, + "order": 5, "type": "integer", "default": 10, - "description": "Top K." + "description": "Select the next word during suggestion generation from the top K candidates. Improves diversity of generated suggestions." }, "openvinoCode.topP": { - "order": 4, + "order": 5, "type": "number", "default": 1, - "description": "Top P." + "description": "A value between 0 and 1. Similar to Top K, it adjusts the number of candidate words based on their probability. Candidates will be added for selection until the cumulative probability exceeds P." }, "openvinoCode.minNewTokens": { - "order": 5, + "order": 6, "type": "number", "default": 1, "description": "Minimum of new generated tokens." }, "openvinoCode.maxNewTokens": { - "order": 5, + "order": 6, "type": "number", "default": 100, "description": "Maximum of new generated tokens." @@ -280,6 +286,12 @@ "key": "ctrl+alt+space", "mac": "ctrl+alt+space", "when": "editorTextFocus" + }, + { + "command": "openvinoCode.stopGeneration", + "key": "escape", + "mac": "escape", + "when": "openvinoCode.generating" } ] }, diff --git a/modules/openvino_code/server/pyproject.toml b/modules/openvino_code/server/pyproject.toml index 54d7a726b..d8f98a281 100644 --- a/modules/openvino_code/server/pyproject.toml +++ b/modules/openvino_code/server/pyproject.toml @@ -4,22 +4,21 @@ version = "0.0.1" requires-python = ">=3.8" dependencies = [ - 'fastapi==0.101.0', - 'uvicorn==0.23.1', + 'fastapi==0.103.1', + 'uvicorn==0.23.2', 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp38-cp38-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.8"', 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp39-cp39-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.9"', 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp310-cp310-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.10"', 'torch @ https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.0.1%2Bcpu.cxx11.abi-cp311-cp311-linux_x86_64.whl ; sys_platform=="linux" and python_version == "3.11"', 'torch ; sys_platform != "linux"', 'openvino==2023.1.0.dev20230811', - 'optimum-intel[openvino]==1.11.0', + 'transformers==4.31.0', + 'optimum==1.12.0', + 'optimum-intel[openvino]==1.10.1', ] [project.optional-dependencies] -dev = [ - "black", - "ruff", -] +dev = ["black", "ruff"] [build-system] requires = ["setuptools>=43.0.0", "wheel"] diff --git a/modules/openvino_code/server/src/app.py b/modules/openvino_code/server/src/app.py index 2fb061a1a..82b5717fa 100644 --- a/modules/openvino_code/server/src/app.py +++ b/modules/openvino_code/server/src/app.py @@ -1,9 +1,9 @@ from time import perf_counter from typing import Dict, Union -from fastapi import Depends, FastAPI +from fastapi import Depends, FastAPI, Request from fastapi.responses import RedirectResponse, StreamingResponse -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, TypeAdapter from src.generators import GeneratorFunctor from src.utils import get_logger @@ -105,11 +105,12 @@ async def generate( @app.post("/api/generate_stream", status_code=200) async def generate_stream( - request: GenerationRequest, + request: Request, generator: GeneratorFunctor = Depends(get_generator_dummy), ) -> StreamingResponse: - logger.info(request) - return StreamingResponse(generator.generate_stream(request.inputs, request.parameters.model_dump())) + generation_request = TypeAdapter(GenerationRequest).validate_python(await request.json()) + logger.info(generation_request) + return StreamingResponse(generator.generate_stream(generation_request.inputs, generation_request.parameters.model_dump(), request)) @app.post("/api/summarize", status_code=200, response_model=GenerationResponse) diff --git a/modules/openvino_code/server/src/generators.py b/modules/openvino_code/server/src/generators.py index ee1ef6b2c..0b9f94b70 100644 --- a/modules/openvino_code/server/src/generators.py +++ b/modules/openvino_code/server/src/generators.py @@ -1,3 +1,4 @@ +import asyncio import re from functools import lru_cache from io import StringIO @@ -6,6 +7,7 @@ from typing import Any, Callable, Container, Dict, Generator, List, Optional, Type, Union import torch +from fastapi import Request from huggingface_hub.utils import EntryNotFoundError from optimum.intel import OVModelForCausalLM, OVModelForSeq2SeqLM from transformers import ( @@ -61,11 +63,15 @@ def get_model(checkpoint: str, device: str = "CPU") -> OVModel: return model +# TODO: generator needs running flag or cancellation on new generation request +# generator cannot handle concurrent requests - fails and stalls process +# RuntimeError: Exception from src/inference/src/infer_request.cpp:189: +# [ REQUEST_BUSY ] class GeneratorFunctor: def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str: raise NotImplementedError - async def generate_stream(self, input_text: str, parameters: Dict[str, Any]): + async def generate_stream(self, input_text: str, parameters: Dict[str, Any], request: Request): raise NotImplementedError def summarize(self, input_text: str, template: str, signature: str, style: str, parameters: Dict[str, Any]): @@ -122,24 +128,45 @@ def __call__( logger.info(f"Number of input tokens: {prompt_len}; generated {len(output_ids)} tokens") return self.tokenizer.decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) - async def generate_stream( - self, input_text: str, parameters: Dict[str, Any], stopping_criteria: Optional[StoppingCriteriaList] = None - ): + async def generate_stream(self, input_text: str, parameters: Dict[str, Any], request: Request = None): input_ids = self.tokenizer.encode(input_text, return_tensors="pt") streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) parameters["streamer"] = streamer config = GenerationConfig.from_dict({**self.generation_config.to_dict(), **parameters}) + + stop_on_tokens = StopOnTokens([]) + generation_kwargs = dict( input_ids=input_ids, streamer=streamer, - stopping_criteria=stopping_criteria, + stopping_criteria=StoppingCriteriaList([stop_on_tokens]), **config.to_dict(), ) + + # listen disconnect event so generation can be stopped + def listen_for_disconnect(): + async def listen(): + message = await request.receive() + if message.get("type") == "http.disconnect": + stop_on_tokens.cancelled = True + asyncio.create_task(listen()) + + + listen_thread = Thread(target=listen_for_disconnect) + # thread.run doesn't actually start a new thread + # it runs the thread function in current thread context + # thread.start() doesn't work here + listen_thread.run() + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) thread.start() + for token in streamer: + await asyncio.sleep(0.01) yield token + thread.join() + def generate_between( self, input_parts: List[str], @@ -243,7 +270,10 @@ def inner() -> GeneratorFunctor: class StopOnTokens(StoppingCriteria): def __init__(self, token_ids: List[int]) -> None: + self.cancelled = False self.token_ids = torch.tensor(token_ids, requires_grad=False) def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: + if self.cancelled: + return True return torch.any(torch.eq(input_ids[0, -1], self.token_ids)).item() diff --git a/modules/openvino_code/src/configuration.ts b/modules/openvino_code/src/configuration.ts index ea7a77a6f..9266a2c5f 100644 --- a/modules/openvino_code/src/configuration.ts +++ b/modules/openvino_code/src/configuration.ts @@ -8,6 +8,7 @@ export type CustomConfiguration = { model: ModelName; serverUrl: string; serverRequestTimeout: number; + streamInlineCompletion: boolean; fillInTheMiddleMode: boolean; temperature: number; topK: number; diff --git a/modules/openvino_code/src/constants.ts b/modules/openvino_code/src/constants.ts index dd817e721..b9c8edd5a 100644 --- a/modules/openvino_code/src/constants.ts +++ b/modules/openvino_code/src/constants.ts @@ -24,4 +24,9 @@ export const COMMANDS = { STOP_SERVER_NATIVE: 'openvinoCode.stopServerNative', SHOW_SERVER_LOG: 'openvinoCode.showServerLog', SHOW_EXTENSION_LOG: 'openvinoCode.showExtensionLog', + STOP_GENERATION: 'openvinoCode.stopGeneration', +}; + +export const EXTENSION_CONTEXT_STATE = { + GENERATING: 'openvinoCode.generating', }; diff --git a/modules/openvino_code/src/inline-completion/completion.service.ts b/modules/openvino_code/src/inline-completion/completion.service.ts index 05a0fa64a..935379238 100644 --- a/modules/openvino_code/src/inline-completion/completion.service.ts +++ b/modules/openvino_code/src/inline-completion/completion.service.ts @@ -1,7 +1,7 @@ import { InlineCompletionItem, Position, Range, TextDocument, window } from 'vscode'; -import { backendService } from '../services/backend.service'; -import { extensionState } from '../state'; import { EXTENSION_DISPLAY_NAME } from '../constants'; +import { IGenerateRequest, backendService } from '../services/backend.service'; +import { extensionState } from '../state'; const outputChannel = window.createOutputChannel(EXTENSION_DISPLAY_NAME, { log: true }); const logCompletionInput = (input: string): void => outputChannel.append(`Completion input:\n${input}\n\n`); @@ -67,6 +67,41 @@ class CompletionService { const completionItem = new InlineCompletionItem(generatedText, new Range(position, position.translate(0, 1))); return [completionItem]; } + + async getCompletionStream( + document: TextDocument, + position: Position, + onDataChunk: (chunk: string) => unknown, + signal?: AbortSignal + ) { + const textBeforeCursor = this._getTextBeforeCursor(document, position); + const textAfterCursor = this._getTextAfterCursor(document, position); + const completionInput = this._prepareCompletionInput(textBeforeCursor, textAfterCursor); + logCompletionInput(completionInput); + + const { temperature, topK, topP, minNewTokens, maxNewTokens } = extensionState.config; + + const request: IGenerateRequest = { + inputs: completionInput, + parameters: { + temperature, + top_k: topK, + top_p: topP, + min_new_tokens: minNewTokens, + max_new_tokens: maxNewTokens, + }, + }; + + outputChannel.append(`Completion output:\n`); + return backendService.generateCompletionStream( + request, + (chunk) => { + outputChannel.append(chunk); + onDataChunk(chunk); + }, + signal + ); + } } export default new CompletionService(); diff --git a/modules/openvino_code/src/inline-completion/index.ts b/modules/openvino_code/src/inline-completion/index.ts index d7ad34eac..cd0d08781 100644 --- a/modules/openvino_code/src/inline-completion/index.ts +++ b/modules/openvino_code/src/inline-completion/index.ts @@ -1,48 +1,39 @@ -import { Disposable, ExtensionContext, commands, languages, window } from 'vscode'; +import { IExtensionState } from '@shared/extension-state'; +import { ExtensionContext } from 'vscode'; import { IExtensionComponent } from '../extension-component.interface'; -import { CommandInlineCompletionItemProvider } from './command-inline-completion-provider'; -import { COMMANDS } from '../constants'; import { extensionState } from '../state'; -import { notificationService } from '../services/notification.service'; +import { inlineCompletion as baseInlineCompletion } from './inline-completion-component'; +import { streamingInlineCompletion } from './streaming-inline-completion-component'; class InlineCompletion implements IExtensionComponent { - activate(context: ExtensionContext): void { - // Register Inline Completion triggered by command - const commandInlineCompletionProvider = new CommandInlineCompletionItemProvider(); - - let commandInlineCompletionDisposable: Disposable; - - const commandDisposable = commands.registerCommand(COMMANDS.GENERATE_INLINE_COPMLETION, () => { - if (!extensionState.get('isServerAvailable')) { - notificationService.showServerNotAvailableMessage(extensionState.state); - return; - } - if (extensionState.get('isLoading') && window.activeTextEditor) { - void window.showTextDocument(window.activeTextEditor.document); - return; - } - - extensionState.set('isLoading', true); - - if (commandInlineCompletionDisposable) { - commandInlineCompletionDisposable.dispose(); - } + private _context: ExtensionContext | null = null; + private _listener = ({ config }: IExtensionState) => this.activateCompletion(config.streamInlineCompletion); - commandInlineCompletionDisposable = languages.registerInlineCompletionItemProvider( - { pattern: '**' }, - commandInlineCompletionProvider - ); - - void commandInlineCompletionProvider.triggerCompletion(() => { - commandInlineCompletionDisposable.dispose(); - extensionState.set('isLoading', false); - }); - }); + activate(context: ExtensionContext): void { + this._context = context; + this.activateCompletion(extensionState.config.streamInlineCompletion); + extensionState.subscribe(this._listener); + } - context.subscriptions.push(commandDisposable); + deactivate(): void { + streamingInlineCompletion.deactivate(); + baseInlineCompletion.deactivate(); + extensionState.unsubscribe(this._listener); } - deactivate(): void {} + activateCompletion(streaming: boolean) { + if (!this._context) { + return; + } + baseInlineCompletion.deactivate(); + streamingInlineCompletion.deactivate(); + + if (streaming) { + streamingInlineCompletion.activate(this._context); + } else { + baseInlineCompletion.activate(this._context); + } + } } export const inlineCompletion = new InlineCompletion(); diff --git a/modules/openvino_code/src/services/backend.service.ts b/modules/openvino_code/src/services/backend.service.ts index 5c251a845..5b260460f 100644 --- a/modules/openvino_code/src/services/backend.service.ts +++ b/modules/openvino_code/src/services/backend.service.ts @@ -3,8 +3,9 @@ import { extensionState } from '../state'; import { notificationService } from './notification.service'; import { lruCache } from '../lru-cache.decorator'; import { ConnectionStatus } from '@shared/extension-state'; +import { streamingRequest } from './request'; -interface IGenerateRequest { +export interface IGenerateRequest { inputs: string; parameters: { temperature: number; @@ -39,7 +40,7 @@ interface RequestOptions { timeout: number; } -class ServerError extends Error { } +class ServerError extends Error {} const skipEmptyGeneratedText = (response: IGenerateResponse | null) => !response?.generated_text.trim(); @@ -76,6 +77,20 @@ class BackendService { return this._sendRequest(this._endpoints.generate, 'POST', data); } + async generateCompletionStream( + data: IGenerateRequest, + onDataChunk: (chunk: string) => void, + signal?: AbortSignal + ): Promise { + return streamingRequest(`${this._apiUrl}/generate_stream`, onDataChunk, { + method: 'POST', + timeout: this._requestTimeoutMs, + headers: this._headers, + body: data, + signal: signal, + }); + } + async generateSummarization(data: IGenerateDocStringRequest): Promise { return this._sendRequest(this._endpoints.summarize, 'POST', data); } diff --git a/modules/openvino_code/src/settings/settings.service.ts b/modules/openvino_code/src/settings/settings.service.ts index 0583119c5..62c38151a 100644 --- a/modules/openvino_code/src/settings/settings.service.ts +++ b/modules/openvino_code/src/settings/settings.service.ts @@ -30,6 +30,7 @@ class SettingsService implements IExtensionComponent { } updateSetting(key: K, value: CustomConfiguration[K]): void { + // FIXME, TODO: model selection configuration update doesn't work if configuration is in .vscode/settings.json void extensionState.config.update(key, value, ConfigurationTarget.Global); } }