Skip to content

Commit

Permalink
.Net: Add VectorStoreTextSearch concepts and unit tests (#8891)
Browse files Browse the repository at this point in the history
### Motivation and Context

1. Add `VectorizedSearchWrapper` to help test `VectorStoreTextSearch`
2. Add unit tests for `VectorStoreTextSearch`

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone 😄
  • Loading branch information
markwallace-microsoft authored Sep 23, 2024
1 parent 952d679 commit 6c4aa5d
Show file tree
Hide file tree
Showing 6 changed files with 515 additions and 6 deletions.
1 change: 1 addition & 0 deletions .github/_typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ extend-exclude = [
"*response.json",
"test_content.txt",
"google_what_is_the_semantic_kernel.json",
"what-is-semantic-kernel.json",
"serializedChatHistoryV1_15_1.json",
"MultipleFunctionsVsParameters.cs"
]
Expand Down
49 changes: 46 additions & 3 deletions dotnet/samples/Concepts/Memory/VectorStoreExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,18 @@ namespace Memory;
internal static class VectorStoreExtensions
{
/// <summary>
/// Delegate to create a record.
/// Delegate to create a record from a string.
/// </summary>
/// <typeparam name="TKey">Type of the record key.</typeparam>
/// <typeparam name="TRecord">Type of the record.</typeparam>
internal delegate TRecord CreateRecord<TKey, TRecord>(string text, ReadOnlyMemory<float> vector) where TKey : notnull where TRecord : class;
internal delegate TRecord CreateRecordFromString<TKey, TRecord>(string text, ReadOnlyMemory<float> vector) where TKey : notnull where TRecord : class;

/// <summary>
/// Delegate to create a record from a <see cref="TextSearchResult"/>.
/// </summary>
/// <typeparam name="TKey">Type of the record key.</typeparam>
/// <typeparam name="TRecord">Type of the record.</typeparam>
internal delegate TRecord CreateRecordFromTextSearchResult<TKey, TRecord>(TextSearchResult searchResult, ReadOnlyMemory<float> vector) where TKey : notnull where TRecord : class;

/// <summary>
/// Create a <see cref="IVectorStoreRecordCollection{TKey, TRecord}"/> from a list of strings by:
Expand All @@ -35,7 +42,7 @@ internal static async Task<IVectorStoreRecordCollection<TKey, TRecord>> CreateCo
string collectionName,
string[] entries,
ITextEmbeddingGenerationService embeddingGenerationService,
CreateRecord<TKey, TRecord> createRecord)
CreateRecordFromString<TKey, TRecord> createRecord)
where TKey : notnull
where TRecord : class
{
Expand All @@ -53,4 +60,40 @@ internal static async Task<IVectorStoreRecordCollection<TKey, TRecord>> CreateCo

return collection;
}

/// <summary>
/// Create a <see cref="IVectorStoreRecordCollection{TKey, TRecord}"/> from a list of strings by:
/// 1. Creating an instance of <see cref="VolatileVectorStoreRecordCollection{TKey, TRecord}"/>
/// 2. Generating embeddings for each string.
/// 3. Creating a record with a valid key for each string and it's embedding.
/// 4. Insert the records into the collection.
/// </summary>
/// <param name="vectorStore">Instance of <see cref="IVectorStore"/> used to created the collection.</param>
/// <param name="collectionName">The collection name.</param>
/// <param name="searchResults">A list of <see cref="TextSearchResult" />s.</param>
/// <param name="embeddingGenerationService">A text embedding generation service.</param>
/// <param name="createRecord">A delegate which can create a record with a valid key for each string and it's embedding.</param>
internal static async Task<IVectorStoreRecordCollection<TKey, TRecord>> CreateCollectionFromTextSearchResultsAsync<TKey, TRecord>(
this IVectorStore vectorStore,
string collectionName,
IList<TextSearchResult> searchResults,
ITextEmbeddingGenerationService embeddingGenerationService,
CreateRecordFromTextSearchResult<TKey, TRecord> createRecord)
where TKey : notnull
where TRecord : class
{
// Get and create collection if it doesn't exist.
var collection = vectorStore.GetCollection<TKey, TRecord>(collectionName);
await collection.CreateCollectionIfNotExistsAsync().ConfigureAwait(false);

// Create records and generate embeddings for them.
var tasks = searchResults.Select(searchResult => Task.Run(async () =>
{
var record = createRecord(searchResult, await embeddingGenerationService.GenerateEmbeddingAsync(searchResult.Value!).ConfigureAwait(false));
await collection.UpsertAsync(record).ConfigureAwait(false);
}));
await Task.WhenAll(tasks).ConfigureAwait(false);

return collection;
}
}
59 changes: 57 additions & 2 deletions dotnet/samples/Concepts/Memory/VolatileVectorStore_LoadData.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Text.Json;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Data;
using Microsoft.SemanticKernel.Embeddings;
Expand All @@ -14,12 +15,17 @@ namespace Memory;
public class VolatileVectorStore_LoadData(ITestOutputHelper output) : BaseTest(output)
{
[Fact]
public async Task LoadRecordCollectionAndSearchAsync()
public async Task LoadStringListAndSearchAsync()
{
// Create a logging handler to output HTTP requests and responses
var handler = new LoggingHandler(new HttpClientHandler(), this.Output);
var httpClient = new HttpClient(handler);

// Create an embedding generation service.
var embeddingGenerationService = new OpenAITextEmbeddingGenerationService(
modelId: TestConfiguration.OpenAI.EmbeddingModelId,
apiKey: TestConfiguration.OpenAI.ApiKey);
apiKey: TestConfiguration.OpenAI.ApiKey,
httpClient: httpClient);

// Construct a volatile vector store.
var vectorStore = new VolatileVectorStore();
Expand Down Expand Up @@ -71,6 +77,49 @@ static DataModel CreateRecord(string text, ReadOnlyMemory<float> embedding)
}
}

[Fact]
public async Task LoadTextSearchResultsAndSearchAsync()
{
// Create an embedding generation service.
var embeddingGenerationService = new OpenAITextEmbeddingGenerationService(
modelId: TestConfiguration.OpenAI.EmbeddingModelId,
apiKey: TestConfiguration.OpenAI.ApiKey);

// Construct a volatile vector store.
var vectorStore = new VolatileVectorStore();
var collectionName = "records";

// Read a list of text strings from a file, to load into a new record collection.
var searchResultsJson = EmbeddedResource.Read("what-is-semantic-kernel.json");
var searchResults = JsonSerializer.Deserialize<List<TextSearchResult>>(searchResultsJson!);

// Delegate which will create a record.
static DataModel CreateRecord(TextSearchResult searchResult, ReadOnlyMemory<float> embedding)
{
return new()
{
Key = Guid.NewGuid(),
Title = searchResult.Name,
Text = searchResult.Value ?? string.Empty,
Link = searchResult.Link,
Embedding = embedding
};
}

// Create a record collection from a list of strings using the provided delegate.
var vectorSearch = await vectorStore.CreateCollectionFromTextSearchResultsAsync<Guid, DataModel>(
collectionName, searchResults!, embeddingGenerationService, CreateRecord);

// Search the collection using a vector search.
var searchString = "What is the Semantic Kernel?";
var searchVector = await embeddingGenerationService.GenerateEmbeddingAsync(searchString);
var searchResult = await vectorSearch!.VectorizedSearchAsync(searchVector, new() { Limit = 1 }).ToListAsync();

Console.WriteLine("Search string: " + searchString);
Console.WriteLine("Result: " + searchResult.First().Record.Text);
Console.WriteLine();
}

/// <summary>
/// Sample model class that represents a record entry.
/// </summary>
Expand All @@ -83,9 +132,15 @@ private sealed class DataModel
[VectorStoreRecordKey]
public Guid Key { get; init; }

[VectorStoreRecordData]
public string? Title { get; init; }

[VectorStoreRecordData]
public string Text { get; init; }

[VectorStoreRecordData]
public string? Link { get; init; }

[VectorStoreRecordVector(1536)]
public ReadOnlyMemory<float> Embedding { get; init; }
}
Expand Down
102 changes: 102 additions & 0 deletions dotnet/samples/Concepts/Resources/what-is-semantic-kernel.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
[
{
"Name": "Introduction to Semantic Kernel | Microsoft Learn",
"Link": "https://learn.microsoft.com/en-us/semantic-kernel/overview/",
"Value": "Semantic Kernel is a lightweight, open-source development kit that lets you easily build AI agents and integrate the latest AI models into your C#, Python, or Java codebase. It serves as an efficient middleware that enables rapid delivery of enterprise-grade solutions."
},
{
"Name": "Semantic Kernel: What It Is and Why It Matters",
"Link": "https://techcommunity.microsoft.com/t5/microsoft-developer-community/semantic-kernel-what-it-is-and-why-it-matters/ba-p/3877022",
"Value": "Semantic Kernel is a new AI SDK, and a simple and yet powerful programming model that lets you add large language capabilities to your app in just a matter of minutes. It uses natural language prompting to create and execute semantic kernel AI tasks across multiple languages and platforms."
},
{
"Name": "How to quickly start with Semantic Kernel | Microsoft Learn",
"Link": "https://learn.microsoft.com/en-us/semantic-kernel/get-started/quick-start-guide",
"Value": "In this guide, you learned how to quickly get started with Semantic Kernel by building a simple AI agent that can interact with an AI service and run your code. To see more examples and learn how to build more complex AI agents, check out our in-depth samples."
},
{
"Name": "Understanding the kernel in Semantic Kernel | Microsoft Learn",
"Link": "https://learn.microsoft.com/en-us/semantic-kernel/concepts/kernel",
"Value": "The kernel is the central component of Semantic Kernel. At its simplest, the kernel is a Dependency Injection container that manages all of the services and plugins necessary to run your AI application."
},
{
"Name": "Hello, Semantic Kernel! | Semantic Kernel - devblogs.microsoft.com",
"Link": "https://devblogs.microsoft.com/semantic-kernel/hello-world/",
"Value": "Semantic Kernel (SK) is a lightweight SDK that lets you mix conventional programming languages, like C# and Python, with the latest in Large Language Model (LLM) AI \u201Cprompts\u201D with prompt templating, chaining, and planning capabilities."
},
{
"Name": "GitHub - microsoft/semantic-kernel: Integrate cutting-edge LLM ...",
"Link": "https://github.com/microsoft/semantic-kernel",
"Value": "The Semantic Kernel extension for Visual Studio Code makes it easy to design and test semantic functions. The extension provides an interface for designing semantic functions and allows you to test them with the push of a button with your existing models and data."
},
{
"Name": "Semantic Kernel: A bridge between large language models and ... - InfoWorld",
"Link": "https://www.infoworld.com/article/2338321/semantic-kernel-a-bridge-between-large-language-models-and-your-code.html",
"Value": "Semantic Kernel distinguishes between semantic functions, templated prompts, and native functions, i.e. the native computer code that processes data for use in the LLM\u2019s semantic functions."
},
{
"Name": "Architecting AI Apps with Semantic Kernel | Semantic Kernel",
"Link": "https://devblogs.microsoft.com/semantic-kernel/architecting-ai-apps-with-semantic-kernel/",
"Value": "With Semantic Kernel, you can easily build agents that can call your existing code. This power lets you automate your business processes with models from OpenAI, Azure OpenAI, Hugging Face, and more! We often get asked though, \u201CHow do I architect my solution?\u201D and \u201CHow does it actually work?\u201D"
},
{
"Name": "semantic-kernel/README.md at main \u00B7 microsoft/semantic-kernel - GitHub",
"Link": "https://github.com/microsoft/semantic-kernel/blob/main/README.md",
"Value": "Semantic Kernel is an SDK that integrates Large Language Models (LLMs) like OpenAI, Azure OpenAI, and Hugging Face with conventional programming languages like C#, Python, and Java. Semantic Kernel achieves this by allowing you to define plugins that can be chained together in just a few lines of code."
},
{
"Name": "Introducing Semantic Kernel for Java | Semantic Kernel",
"Link": "https://devblogs.microsoft.com/semantic-kernel/introducing-semantic-kernel-for-java/",
"Value": "Semantic Kernel for Java is an open source library that empowers developers to harness the power of AI while coding in Java. It is compatible with Java 8 and above, ensuring flexibility and accessibility to a wide range of Java developers."
},
{
"Name": "Semantic Kernel: The New Way to Create Artificial Intelligence ... - Medium",
"Link": "https://medium.com/globant/semantic-kernel-the-new-way-to-create-artificial-intelligence-applications-7959d5fc90ca",
"Value": "Semantic Kernel enables developers to easily blend cutting-edge AI with native code, opening up a world of new possibilities for AI applications. This article could go on to discuss..."
},
{
"Name": "How to Get Started using Semantic Kernel .NET",
"Link": "https://devblogs.microsoft.com/semantic-kernel/how-to-get-started-using-semantic-kernel-net/",
"Value": "Prompt Templates. Chat Prompting. Filtering. Dependency Injection. A Glimpse into the Gettings Started Steps: In the guide below we\u2019ll start from scratch and navigate with you through each of the example steps, clarifying the code, details and running them in real time."
},
{
"Name": "What is Semantic Kernel? - Introducing Semantic Kernel: Building AI ...",
"Link": "https://www.linkedin.com/learning/introducing-semantic-kernel-building-ai-based-apps/what-is-semantic-kernel",
"Value": "After watching this video, you\u0027ll be able to explain what Semantic Kernel is and how it changes the way that developers are currently working."
},
{
"Name": "Semantic Kernel - AI Hub",
"Link": "https://azure.github.io/aihub/docs/concepts/semantic-kernel/",
"Value": "Semantic Kernel is an open-source SDK that lets you easily combine AI services like OpenAI, Azure OpenAI, and Hugging Face with conventional programming languages like C# and Python. By doing so, you can create AI apps that combine the best of both worlds. Microsoft powers its Copilot system with a stack of AI models and plugins."
},
{
"Name": "Semantic Kernel documentation | Microsoft Learn",
"Link": "https://learn.microsoft.com/en-us/semantic-kernel/",
"Value": "Semantic Kernel documentation. Learn to build robust, future-proof AI solutions that evolve with technological advancements."
},
{
"Name": "Unlock the Potential of AI in Your Apps with Semantic Kernel: A ...",
"Link": "https://techcommunity.microsoft.com/t5/educator-developer-blog/unlock-the-potential-of-ai-in-your-apps-with-semantic-kernel-a/ba-p/3773847",
"Value": "Semantic Kernel (SK) is an innovative and lightweight Software Development Kit (SDK) designed to integrate Artificial Intelligence (AI) Large Language Models (LLMs) with conventional programming languages."
},
{
"Name": "Microsoft Semantic Kernel and AutoGen: Open Source Frameworks for AI ...",
"Link": "https://techcommunity.microsoft.com/t5/educator-developer-blog/microsoft-semantic-kernel-and-autogen-open-source-frameworks-for/ba-p/4051305",
"Value": "Semantic Kernel is an open-source Software Development Kit (SDK) that allows developers to build AI agents that can call existing code. It\u0027s designed to work with models from various AI providers like OpenAI, Azure OpenAI, and Hugging Face."
},
{
"Name": "Getting started with Microsoft Semantic Kernel - C# Corner",
"Link": "https://www.c-sharpcorner.com/article/getting-started-with-microsoft-semantic-kernel/",
"Value": "Microsoft Semantic Kernel is an Open Source lightweight SDK for consuming Large Language Models (LLMs) in normal programming languages like C# and Python. We can use OpenAI, Azure OpenAI, and Hugging Face language models in our existing apps to extend our app\u0027s capabilities without needing to train or fine-tune a model from scratch."
},
{
"Name": "Zero To AI Hero Pt 3: Agents in Semantic Kernel - DZone",
"Link": "https://dzone.com/articles/zero-to-ai-hero-part-three-power-of-agents",
"Value": "Agents in Semantic Kernel are not just tools; they\u2019re dynamic assistants that combine the power of AI, plugins, and orchestrated plans to solve complex problems. By understanding their building ..."
},
{
"Name": "Semantic Kernel overview for .NET - .NET | Microsoft Learn",
"Link": "https://learn.microsoft.com/en-us/dotnet/ai/semantic-kernel-dotnet-overview",
"Value": "Semantic Kernel is a powerful and recommended choice for working with AI in .NET applications. In the sections ahead, you learn: How to add semantic kernel to your project. Semantic Kernel core concepts. The sections ahead serve as an introductory overview of Semantic Kernel specifically in the context of .NET."
}
]
30 changes: 29 additions & 1 deletion dotnet/samples/Concepts/Search/VectorStore_TextSearch.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Runtime.CompilerServices;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Microsoft.SemanticKernel.Data;
using Microsoft.SemanticKernel.Embeddings;

namespace Search;

/// <summary>
/// This example shows how to create and use a <see cref="VectorStoreTextSearch{TRecord}"/>.
/// This example shows how to create and use a <see cref="VectorStoreTextSearch{TRecord}"/> instance.
/// </summary>
public class VectorStore_TextSearch(ITestOutputHelper output) : BaseTest(output)
{
Expand Down Expand Up @@ -51,7 +52,16 @@ static DataModel CreateRecord(string text, ReadOnlyMemory<float> embedding)
var stringMapper = new DataModelTextSearchStringMapper();
var resultMapper = new DataModelTextSearchResultMapper();
var textSearch = new VectorStoreTextSearch<DataModel>(vectorizedSearch, textEmbeddingGeneration, stringMapper, resultMapper);
await ExecuteSearchesAsync(textSearch);

// Create a text search instance using a vectorized search wrapper around the volatile vector store.
IVectorizableTextSearch<DataModel> vectorizableTextSearch = new VectorizedSearchWrapper<DataModel>(vectorizedSearch, textEmbeddingGeneration);
textSearch = new VectorStoreTextSearch<DataModel>(vectorizableTextSearch, stringMapper, resultMapper);
await ExecuteSearchesAsync(textSearch);
}

private async Task ExecuteSearchesAsync(VectorStoreTextSearch<DataModel> textSearch)
{
var query = "What is the Semantic Kernel?";

// Search and return results as a string items
Expand Down Expand Up @@ -161,6 +171,24 @@ internal static async Task<IVectorStoreRecordCollection<TKey, TRecord>> CreateCo
return collection;
}

/// <summary>
/// Decorator for a <see cref="IVectorizedSearch{TRecord}"/> that generates embeddings for text search queries.
/// </summary>
private sealed class VectorizedSearchWrapper<TRecord>(IVectorizedSearch<TRecord> vectorizedSearch, ITextEmbeddingGenerationService textEmbeddingGeneration) : IVectorizableTextSearch<TRecord>
where TRecord : class
{
/// <inheritdoc/>
public async IAsyncEnumerable<VectorSearchResult<TRecord>> VectorizableTextSearchAsync(string searchText, VectorSearchOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var vectorizedQuery = await textEmbeddingGeneration!.GenerateEmbeddingAsync(searchText, cancellationToken: cancellationToken).ConfigureAwait(false);

await foreach (var result in vectorizedSearch.VectorizedSearchAsync(vectorizedQuery, options, cancellationToken))
{
yield return result;
}
}
}

/// <summary>
/// Sample model class that represents a record entry.
/// </summary>
Expand Down
Loading

0 comments on commit 6c4aa5d

Please sign in to comment.