-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
.Net: Add PostgresVectorStore Memory connector. (#9324)
This PR adds a PostgresVectorStore and related classes to Microsoft.SemanticKernel.Connectors.Postgres. ### Motivation and Context As part of the move to having memory connectors implement the new Microsoft.Extensions.VectorData.IVectorStore architecture (see https://github.com/microsoft/semantic-kernel/blob/main/docs/decisions/0050-updated-vector-store-design.md), each memory connector needs to be updated with the new architecture. This PR tackles updating the existing Microsoft.SemanticKernel.Connectors.Postgres package to include this implementation. This will supercede the PostgresMemoryStore implementation. Some high level comments about design: - PostgresVectorStore and PostgresVectorStoreRecordCollection get injected with an IPostgresVectorStoreDbClient. This abstracts the database communication and allows for unit tests to mock database interactions. - The PostgresVectorStoreDbClient gets passed in a NpgsqlDataSource from the user, which is used to manage connections to the database. The responsibility of connection pool lifecycle management is on the user. - The IPostgresVectorStoreDbClient is designed to accept and produce the storage model, which in this case is a Dictionary<string, object?> . This is the intermediate type that is mapped to by the IVectorStoreRecordMapper. - The PostgresVectorStoreDbClient also takes a IPostgresVectorStoreCollectionSqlBuilder, which generates SQL command information for interacting with the database. This abstracts the SQL queries related to each task, and allows for future expansion. This is particularly targeted at creating a AzureDBForPostgre vector store that will enable alternate vector implementations like [DiskANN](https://techcommunity.microsoft.com/t5/azure-database-for-postgresql/introducing-diskann-vector-index-in-azure-database-for/ba-p/4261192), while leveraging the same database client as the Postgres connector. - The integration tests for the vector store utilize Docker.Net to bring up a pgvector/pgvector docker container, which test are run against. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Rob Emanuele <[email protected]> Co-authored-by: Dmytro Struk <[email protected]>
- Loading branch information
1 parent
12a4d40
commit c7a371e
Showing
42 changed files
with
5,074 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStorePostgresContainerFixture.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Docker.DotNet; | ||
using Npgsql; | ||
|
||
namespace Memory.VectorStoreFixtures; | ||
|
||
/// <summary> | ||
/// Fixture to use for creating a Postgres container before tests and delete it after tests. | ||
/// </summary> | ||
public class VectorStorePostgresContainerFixture : IAsyncLifetime | ||
{ | ||
private DockerClient? _dockerClient; | ||
private string? _postgresContainerId; | ||
|
||
public async Task InitializeAsync() | ||
{ | ||
} | ||
|
||
public async Task ManualInitializeAsync() | ||
{ | ||
if (this._postgresContainerId == null) | ||
{ | ||
// Connect to docker and start the docker container. | ||
using var dockerClientConfiguration = new DockerClientConfiguration(); | ||
this._dockerClient = dockerClientConfiguration.CreateClient(); | ||
this._postgresContainerId = await VectorStoreInfra.SetupPostgresContainerAsync(this._dockerClient); | ||
|
||
// Delay until the Postgres server is ready. | ||
var connectionString = TestConfiguration.Postgres.ConnectionString; | ||
var succeeded = false; | ||
var attemptCount = 0; | ||
while (!succeeded && attemptCount++ < 10) | ||
{ | ||
try | ||
{ | ||
NpgsqlDataSourceBuilder dataSourceBuilder = new(connectionString); | ||
dataSourceBuilder.UseVector(); | ||
using var dataSource = dataSourceBuilder.Build(); | ||
NpgsqlConnection connection = await dataSource.OpenConnectionAsync().ConfigureAwait(false); | ||
|
||
await using (connection) | ||
{ | ||
// Create extension vector if it doesn't exist | ||
await using (NpgsqlCommand command = new("CREATE EXTENSION IF NOT EXISTS vector", connection)) | ||
{ | ||
await command.ExecuteNonQueryAsync(); | ||
} | ||
} | ||
} | ||
catch (Exception) | ||
{ | ||
await Task.Delay(1000); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public async Task DisposeAsync() | ||
{ | ||
if (this._dockerClient != null && this._postgresContainerId != null) | ||
{ | ||
// Delete docker container. | ||
await VectorStoreInfra.DeleteContainerAsync(this._dockerClient, this._postgresContainerId); | ||
} | ||
} | ||
} |
85 changes: 85 additions & 0 deletions
85
dotnet/samples/Concepts/Memory/VectorStore_VectorSearch_MultiStore_Postgres.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Azure.Identity; | ||
using Memory.VectorStoreFixtures; | ||
using Microsoft.Extensions.DependencyInjection; | ||
using Microsoft.SemanticKernel; | ||
using Microsoft.SemanticKernel.Connectors.AzureOpenAI; | ||
using Microsoft.SemanticKernel.Connectors.Postgres; | ||
using Npgsql; | ||
|
||
namespace Memory; | ||
|
||
/// <summary> | ||
/// An example showing how to use common code, that can work with any vector database, with a Postgres database. | ||
/// The common code is in the <see cref="VectorStore_VectorSearch_MultiStore_Common"/> class. | ||
/// The common code ingests data into the vector store and then searches over that data. | ||
/// This example is part of a set of examples each showing a different vector database. | ||
/// | ||
/// For other databases, see the following classes: | ||
/// <para><see cref="VectorStore_VectorSearch_MultiStore_AzureAISearch"/></para> | ||
/// <para><see cref="VectorStore_VectorSearch_MultiStore_Redis"/></para> | ||
/// <para><see cref="VectorStore_VectorSearch_MultiStore_InMemory"/></para> | ||
/// | ||
/// To run this sample, you need a local instance of Docker running, since the associated fixture will try and start a Postgres container in the local docker instance. | ||
/// </summary> | ||
public class VectorStore_VectorSearch_MultiStore_Postgres(ITestOutputHelper output, VectorStorePostgresContainerFixture PostgresFixture) : BaseTest(output), IClassFixture<VectorStorePostgresContainerFixture> | ||
{ | ||
[Fact] | ||
public async Task ExampleWithDIAsync() | ||
{ | ||
// Use the kernel for DI purposes. | ||
var kernelBuilder = Kernel | ||
.CreateBuilder(); | ||
|
||
// Register an embedding generation service with the DI container. | ||
kernelBuilder.AddAzureOpenAITextEmbeddingGeneration( | ||
deploymentName: TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, | ||
endpoint: TestConfiguration.AzureOpenAIEmbeddings.Endpoint, | ||
credential: new AzureCliCredential()); | ||
|
||
// Initialize the Postgres docker container via the fixtures and register the Postgres VectorStore. | ||
await PostgresFixture.ManualInitializeAsync(); | ||
kernelBuilder.Services.AddPostgresVectorStore(TestConfiguration.Postgres.ConnectionString); | ||
|
||
// Register the test output helper common processor with the DI container. | ||
kernelBuilder.Services.AddSingleton<ITestOutputHelper>(this.Output); | ||
kernelBuilder.Services.AddTransient<VectorStore_VectorSearch_MultiStore_Common>(); | ||
|
||
// Build the kernel. | ||
var kernel = kernelBuilder.Build(); | ||
|
||
// Build a common processor object using the DI container. | ||
var processor = kernel.GetRequiredService<VectorStore_VectorSearch_MultiStore_Common>(); | ||
|
||
// Run the process and pass a key generator function to it, to generate unique record keys. | ||
// The key generator function is required, since different vector stores may require different key types. | ||
// E.g. Postgres supports Guid and ulong keys, but others may support strings only. | ||
await processor.IngestDataAndSearchAsync("skglossaryWithDI", () => Guid.NewGuid()); | ||
} | ||
|
||
[Fact] | ||
public async Task ExampleWithoutDIAsync() | ||
{ | ||
// Create an embedding generation service. | ||
var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( | ||
TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, | ||
TestConfiguration.AzureOpenAIEmbeddings.Endpoint, | ||
new AzureCliCredential()); | ||
|
||
// Initialize the Postgres docker container via the fixtures and construct the Postgres VectorStore. | ||
await PostgresFixture.ManualInitializeAsync(); | ||
var dataSourceBuilder = new NpgsqlDataSourceBuilder(TestConfiguration.Postgres.ConnectionString); | ||
dataSourceBuilder.UseVector(); | ||
await using var dataSource = dataSourceBuilder.Build(); | ||
var vectorStore = new PostgresVectorStore(dataSource); | ||
|
||
// Create the common processor that works for any vector store. | ||
var processor = new VectorStore_VectorSearch_MultiStore_Common(vectorStore, textEmbeddingGenerationService, this.Output); | ||
|
||
// Run the process and pass a key generator function to it, to generate unique record keys. | ||
// The key generator function is required, since different vector stores may require different key types. | ||
// E.g. Postgres supports Guid and ulong keys, but others may support strings only. | ||
await processor.IngestDataAndSearchAsync("skglossaryWithoutDI", () => Guid.NewGuid()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.