From d403a02d27ff3312514f637ea239c1fa8092c232 Mon Sep 17 00:00:00 2001 From: JohanYman Date: Tue, 7 Nov 2023 15:42:28 +0100 Subject: [PATCH] We encountered some performance issues and implemented the following fix. We updated the TokenUtils class within the Skills Web API to address these issues. A newly-introduced tokenizer has been implemented for encoding text, which significantly improves the efficiency of the TokenCount method. As a result of this change, the performance of the token counting operations has been enhanced, ensuring more accurate and faster responses for users. --- webapi/Skills/Utils/TokenUtils.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webapi/Skills/Utils/TokenUtils.cs b/webapi/Skills/Utils/TokenUtils.cs index b3478cc46..dbf890253 100644 --- a/webapi/Skills/Utils/TokenUtils.cs +++ b/webapi/Skills/Utils/TokenUtils.cs @@ -17,6 +17,8 @@ namespace CopilotChat.WebApi.Skills.Utils; /// public static class TokenUtils { + private static SharpToken.GptEncoding tokenizer = SharpToken.GptEncoding.GetEncoding("cl100k_base"); + /// /// Semantic dependencies of ChatSkill. /// If you add a new semantic dependency, please add it here. @@ -98,7 +100,6 @@ internal static void GetFunctionTokenUsage(SKContext result, SKContext chatConte /// The string to calculate the number of tokens in. internal static int TokenCount(string text) { - var tokenizer = SharpToken.GptEncoding.GetEncoding("cl100k_base"); var tokens = tokenizer.Encode(text); return tokens.Count; }