Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HuggingFace backend implementation #135

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions dotnet/SK-dotnet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "nuget", "nuget", "{F4243136
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KernelBuilder", "..\samples\dotnet\KernelBuilder\KernelBuilder.csproj", "{A52818AC-57FB-495F-818F-9E1E7BC5618C}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "backends", "backends", "{1B79D24F-6419-43B9-84E4-0A6EB46CC499}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Backends.HuggingFace", "src\Backends.HuggingFace\Backends.HuggingFace.csproj", "{A6300F56-0D8B-456C-9DA6-A5DA64B217C7}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Backends.HuggingFace.UnitTests", "src\Backends.HuggingFace.UnitTests\Backends.HuggingFace.UnitTests.csproj", "{EDDF7296-96D8-42D1-B568-32346665B180}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -110,6 +116,14 @@ Global
{A52818AC-57FB-495F-818F-9E1E7BC5618C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A52818AC-57FB-495F-818F-9E1E7BC5618C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A52818AC-57FB-495F-818F-9E1E7BC5618C}.Release|Any CPU.Build.0 = Release|Any CPU
{A6300F56-0D8B-456C-9DA6-A5DA64B217C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A6300F56-0D8B-456C-9DA6-A5DA64B217C7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A6300F56-0D8B-456C-9DA6-A5DA64B217C7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A6300F56-0D8B-456C-9DA6-A5DA64B217C7}.Release|Any CPU.Build.0 = Release|Any CPU
{EDDF7296-96D8-42D1-B568-32346665B180}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EDDF7296-96D8-42D1-B568-32346665B180}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EDDF7296-96D8-42D1-B568-32346665B180}.Release|Any CPU.ActiveCfg = Release|Any CPU
{EDDF7296-96D8-42D1-B568-32346665B180}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -119,17 +133,16 @@ Global
{A05BF65E-085E-476C-B88A-9DA93F005416} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{47C6F821-5103-431F-B3B8-A2868A68BB78} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{3EB61E99-C39B-4620-9482-F8DA18E48525} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{34A7F1EF-D243-4160-A413-D713FEABCD94} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{E4B777A1-28E1-41BE-96AE-7F3EC61FD5D4} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
dmytrostruk marked this conversation as resolved.
Show resolved Hide resolved
{F94D1938-9DB7-4B24-9FF3-166DDFD96330} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974}
{689A5041-BAE7-448F-9BDC-4672E96249AA} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974}
{EEA87FBC-4ED5-458C-ABD3-BEAEEB535BAF} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974}
{37E39C68-5A40-4E63-9D3C-0C66AD98DFCB} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
{E23E7270-F13D-4620-A115-AA6A8619EE5A} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974}
{9ECD1AA0-75B3-4E25-B0B5-9F0945B64974} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
{107156B4-5A8B-45C7-97A2-4544D7FA19DE} = {9ECD1AA0-75B3-4E25-B0B5-9F0945B64974}
{F4243136-252A-4459-A7C4-EE8C056D6B0B} = {158A4E5E-AEE0-4D60-83C7-8E089B2D881D}
{A52818AC-57FB-495F-818F-9E1E7BC5618C} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{1B79D24F-6419-43B9-84E4-0A6EB46CC499} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
{A6300F56-0D8B-456C-9DA6-A5DA64B217C7} = {1B79D24F-6419-43B9-84E4-0A6EB46CC499}
{EDDF7296-96D8-42D1-B568-32346665B180} = {1B79D24F-6419-43B9-84E4-0A6EB46CC499}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<RootNamespace>SemanticKernel.Backends.HuggingFace.UnitTests</RootNamespace>
<AssemblyName>SemanticKernel.Backends.HuggingFace.UnitTests</AssemblyName>
<TargetFramework>net6.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="Moq" />
<PackageReference Include="xunit" />
<PackageReference Include="xunit.runner.visualstudio">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Backends.HuggingFace\Backends.HuggingFace.csproj" />
</ItemGroup>

<ItemGroup>
<None Update="TestData\completion_test_response.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestData\embeddings_test_response.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.Backends.HuggingFace;
using Moq;
using Moq.Protected;
using Xunit;

namespace SemanticKernel.Backends.HuggingFace.UnitTests;

/// <summary>
/// Unit tests for <see cref="HuggingFaceLocalBackend"/> class.
/// </summary>
public class HuggingFaceLocalBackendTests : IDisposable
{
private const string BaseUri = "http://localhost:5000";
private const string Model = "gpt2";

private readonly HttpResponseMessage _response = new()
{
StatusCode = HttpStatusCode.OK,
};

/// <summary>
/// Verifies that <see cref="HuggingFaceLocalBackend.CompleteAsync(string, CompleteRequestSettings)"/>
/// returns expected completed text without errors.
/// </summary>
[Fact]
public async Task ItReturnsCompletionCorrectlyAsync()
{
// Arrange
const string prompt = "This is test";
CompleteRequestSettings requestSettings = new();

using var backend = this.CreateBackend(this.GetTestResponse("completion_test_response.json"));

// Act
var completion = await backend.CompleteAsync(prompt, requestSettings);

// Assert
Assert.Equal("This is test completion response", completion);
}

/// <summary>
/// Verifies that <see cref="HuggingFaceLocalBackend.GenerateEmbeddingsAsync(IList{string})"/>
/// returns expected list of generated embeddings without errors.
/// </summary>
[Fact]
public async Task ItReturnsEmbeddingsCorrectlyAsync()
{
// Arrange
const int expectedEmbeddingCount = 1;
const int expectedVectorCount = 8;
List<string> data = new() { "test_string_1", "test_string_2", "test_string_3" };

using var backend = this.CreateBackend(this.GetTestResponse("embeddings_test_response.json"));

// Act
var embeddings = await backend.GenerateEmbeddingsAsync(data);

// Assert
Assert.NotNull(embeddings);
Assert.Equal(expectedEmbeddingCount, embeddings.Count);
Assert.Equal(expectedVectorCount, embeddings.First().Count);
}

/// <summary>
/// Reads test response from file for mocking purposes.
/// </summary>
/// <param name="fileName">Name of the file with test response.</param>
private string GetTestResponse(string fileName)
{
return File.ReadAllText($"./TestData/{fileName}");
}

/// <summary>
/// Initializes <see cref="HuggingFaceLocalBackend"/> with mocked <see cref="HttpClientHandler"/>.
/// </summary>
/// <param name="testResponse">Test response for <see cref="HttpClientHandler"/> to return.</param>
private HuggingFaceLocalBackend CreateBackend(string testResponse)
{
var httpClientHandler = new Mock<HttpClientHandler>();

this._response.Content = new StringContent(testResponse);

httpClientHandler
.Protected()
.Setup<Task<HttpResponseMessage>>(
"SendAsync",
ItExpr.IsAny<HttpRequestMessage>(),
ItExpr.IsAny<CancellationToken>())
.ReturnsAsync(this._response);

return new HuggingFaceLocalBackend(BaseUri, Model, httpClientHandler.Object);
}

public void Dispose()
{
this.Dispose(true);
GC.SuppressFinalize(this);
}

protected virtual void Dispose(bool disposing)
{
if (disposing)
{
this._response.Dispose();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"choices": [
{
"finish_reason": "test",
"index": 0,
"logprobs": "",
"text": "This is test completion response"
}
],
"created": "Tue, 21 Mar 2023 11:18:04 GMT",
"id": "",
"model": "gpt2",
"object": "text_completion",
"usage": {
"completion_tokens": 32,
"prompt_tokens": 3,
"total_tokens": 35
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"data": [
{
"embedding": [
-0.08541165292263031,
0.08639130741357803,
-0.12805694341659546,
-0.2877824902534485,
0.2114177942276001,
-0.29374566674232483,
-0.10496602207422256,
0.009402364492416382
],
"index": 0,
"object": "embedding"
}
],
"object": "list",
"usage": {
"prompt_tokens": 15,
"total_tokens": 15
}
}
13 changes: 13 additions & 0 deletions dotnet/src/Backends.HuggingFace/Backends.HuggingFace.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<AssemblyName>Microsoft.SemanticKernel.Backends.HuggingFace</AssemblyName>
<RootNamespace>Microsoft.SemanticKernel.Backends.HuggingFace</RootNamespace>
<TargetFramework>netstandard2.1</TargetFramework>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\SemanticKernel\SemanticKernel.csproj" />
</ItemGroup>

</Project>
25 changes: 25 additions & 0 deletions dotnet/src/Backends.HuggingFace/HttpSchema/CompletionRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Text.Json.Serialization;

namespace Microsoft.SemanticKernel.Backends.HuggingFace.HttpSchema;

/// <summary>
/// HTTP schema to perform completion request.
/// </summary>
[Serializable]
dluc marked this conversation as resolved.
Show resolved Hide resolved
public sealed class CompletionRequest
{
/// <summary>
/// Prompt to complete.
/// </summary>
[JsonPropertyName("prompt")]
public string? Prompt { get; set; }
dmytrostruk marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Model to use for completion.
/// </summary>
[JsonPropertyName("model")]
public string? Model { get; set; }
}
30 changes: 30 additions & 0 deletions dotnet/src/Backends.HuggingFace/HttpSchema/CompletionResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Text.Json.Serialization;

namespace Microsoft.SemanticKernel.Backends.HuggingFace.HttpSchema;

/// <summary>
/// HTTP Schema for completion response.
/// </summary>
public sealed class CompletionResponse
{
/// <summary>
/// Model containing possible completion option.
/// </summary>
public sealed class Choice
{
/// <summary>
/// Completed text.
/// </summary>
[JsonPropertyName("text")]
public string? Text { get; set; }
}

/// <summary>
/// List of possible completions.
/// </summary>
[JsonPropertyName("choices")]
public IList<Choice>? Choices { get; set; }
}
26 changes: 26 additions & 0 deletions dotnet/src/Backends.HuggingFace/HttpSchema/EmbeddingRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;

namespace Microsoft.SemanticKernel.Backends.HuggingFace.HttpSchema;

/// <summary>
/// HTTP schema to perform embedding request.
/// </summary>
[Serializable]
public sealed class EmbeddingRequest
{
/// <summary>
/// Data to embed.
/// </summary>
[JsonPropertyName("input")]
public IList<string>? Input { get; set; }

/// <summary>
/// Model to use for embedding generation.
/// </summary>
[JsonPropertyName("model")]
public string? Model { get; set; }
}
27 changes: 27 additions & 0 deletions dotnet/src/Backends.HuggingFace/HttpSchema/EmbeddingResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;
using System.Text.Json.Serialization;

namespace Microsoft.SemanticKernel.Backends.HuggingFace.HttpSchema;

/// <summary>
/// HTTP Schema for embedding response.
/// </summary>
public sealed class EmbeddingResponse
{
/// <summary>
/// Model containing embedding.
/// </summary>
public sealed class EmbeddingVector
{
[JsonPropertyName("embedding")]
public IList<float>? Embedding { get; set; }
}

/// <summary>
/// List of embeddings.
/// </summary>
[JsonPropertyName("data")]
public IList<EmbeddingVector>? Embeddings { get; set; }
}
Loading