Skip to content

Commit

Permalink
feat: Added filtering ability to supabase (langchain-ai#905)
Browse files Browse the repository at this point in the history
* Added filtering ability to supabase

* Supabase metadata filter polish: small fix, adds integration test, update docs

* Skip Supabase integration test reliant on outside setup

* Use FakeEmbeddings in unit test

* Update docs

---------

Co-authored-by: Jacob Lee <[email protected]>
  • Loading branch information
mishkinf and Jacob Lee authored May 5, 2023
1 parent 478fcd9 commit f6f06e0
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 4 deletions.
4 changes: 3 additions & 1 deletion docs/docs/modules/indexes/retrievers/supabase-hybrid.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ create table documents (
-- Create a function to similarity search for documents
create function match_documents (
query_embedding vector(1536),
match_count int
match_count int,
filter jsonb DEFAULT '{}'
) returns table (
id bigint,
content text,
Expand All @@ -47,6 +48,7 @@ begin
metadata,
1 - (documents.embedding <=> query_embedding) as similarity
from documents
where metadata @> filter
order by documents.embedding <=> query_embedding
limit match_count;
end;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ create table documents (
-- Create a function to search for documents
create function match_documents (
query_embedding vector(1536),
match_count int
match_count int,
filter jsonb DEFAULT '{}'
) returns table (
id bigint,
content text,
Expand All @@ -47,6 +48,7 @@ begin
metadata,
1 - (documents.embedding <=> query_embedding) as similarity
from documents
where metadata @> filter
order by documents.embedding <=> query_embedding
limit match_count;
end;
Expand All @@ -57,5 +59,18 @@ $$;

import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/indexes/vector_stores/supabase.ts";
import MetadataFilterExample from "@examples/indexes/vector_stores/supabase_with_metadata_filter.ts";

### Standard Usage

The below example shows how to perform a basic similarity search with Supabase:

<CodeBlock language="typescript">{Example}</CodeBlock>

### Metadata Filtering

Given the above `match_documents` Postgres function, you can also pass a filter parameter to only documents with a specific metadata field value.

**Note:** If you've previously been using `SupabaseVectorStore`, you may need to drop and recreate the `match_documents` function per the updated SQL above to use this functionality.

<CodeBlock language="typescript">{MetadataFilterExample}</CodeBlock>
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { SupabaseVectorStore } from "langchain/vectorstores/supabase";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { createClient } from "@supabase/supabase-js";

// First, follow set-up instructions at
// https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/supabase

const privateKey = process.env.SUPABASE_PRIVATE_KEY;
if (!privateKey) throw new Error(`Expected env var SUPABASE_PRIVATE_KEY`);

const url = process.env.SUPABASE_URL;
if (!url) throw new Error(`Expected env var SUPABASE_URL`);

export const run = async () => {
const client = createClient(url, privateKey);

const vectorStore = await SupabaseVectorStore.fromTexts(
["Hello world", "Hello world", "Hello world"],
[{ user_id: 2 }, { user_id: 1 }, { user_id: 3 }],
new OpenAIEmbeddings(),
{
client,
tableName: "documents",
queryName: "match_documents",
}
);

const result = await vectorStore.similaritySearch("Hello world", 1, {
user_id: 3,
});

console.log(result);
};
18 changes: 17 additions & 1 deletion langchain/src/vectorstores/supabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ import { Document } from "../document.js";
interface SearchEmbeddingsParams {
query_embedding: number[];
match_count: number; // int
filter?: SupabaseMetadata;
}

// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any
type SupabaseMetadata = Record<string, any>;

interface SearchEmbeddingsResponse {
id: number;
content: string;
Expand All @@ -19,21 +23,27 @@ export interface SupabaseLibArgs {
client: SupabaseClient;
tableName?: string;
queryName?: string;
filter?: SupabaseMetadata;
}

export class SupabaseVectorStore extends VectorStore {
declare FilterType: SupabaseMetadata;

client: SupabaseClient;

tableName: string;

queryName: string;

filter?: SupabaseMetadata;

constructor(embeddings: Embeddings, args: SupabaseLibArgs) {
super(embeddings, args);

this.client = args.client;
this.tableName = args.tableName || "documents";
this.queryName = args.queryName || "match_documents";
this.filter = args.filter;
}

async addDocuments(documents: Document[]): Promise<void> {
Expand Down Expand Up @@ -68,9 +78,15 @@ export class SupabaseVectorStore extends VectorStore {

async similaritySearchVectorWithScore(
query: number[],
k: number
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
if (filter && this.filter) {
throw new Error("cannot provide both `filter` and `this.filter`");
}
const _filter = filter ?? this.filter;
const matchDocumentsParams: SearchEmbeddingsParams = {
filter: _filter,
query_embedding: query,
match_count: k,
};
Expand Down
40 changes: 39 additions & 1 deletion langchain/src/vectorstores/tests/supabase.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { OpenAIEmbeddings } from "../../embeddings/openai.js";
import { Document } from "../../document.js";
import { SupabaseVectorStore } from "../supabase.js";

test("SupabaseVectorStore with external ids", async () => {
test.skip("SupabaseVectorStore with external ids", async () => {
const client = createClient(
process.env.SUPABASE_URL!,
process.env.SUPABASE_PRIVATE_KEY!
Expand All @@ -34,3 +34,41 @@ test("SupabaseVectorStore with external ids", async () => {
new Document({ metadata: { a: 1 }, pageContent: "hello" }),
]);
});

test.skip("Search a SupabaseVectorStore using a metadata filter", async () => {
const client = createClient(
process.env.SUPABASE_URL!,
process.env.SUPABASE_PRIVATE_KEY!
);

const embeddings = new OpenAIEmbeddings();

const store = new SupabaseVectorStore(embeddings, {
client,
tableName: "documents",
});

expect(store).toBeDefined();

const createdAt = new Date().getTime();

await store.addDocuments([
{ pageContent: "hello 0", metadata: { created_at: createdAt } },
{ pageContent: "hello 1", metadata: { created_at: createdAt + 1 } },
{ pageContent: "hello 2", metadata: { created_at: createdAt + 2 } },
{ pageContent: "hello 3", metadata: { created_at: createdAt + 3 } },
]);

const results = await store.similaritySearch("hello", 1, {
created_at: createdAt + 2,
});

expect(results).toHaveLength(1);

expect(results).toEqual([
new Document({
metadata: { created_at: createdAt + 2 },
pageContent: "hello 2",
}),
]);
});
46 changes: 46 additions & 0 deletions langchain/src/vectorstores/tests/supabase.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { test, expect, jest } from "@jest/globals";
import { SupabaseClient } from "@supabase/supabase-js";

import { SupabaseVectorStore } from "../supabase.js";

import { FakeEmbeddings } from "../../embeddings/fake.js";

test("similaritySearchVectorWithScore should call RPC with the vectorstore filters", async () => {
const supabaseClientMock = {
rpc: jest.fn().mockReturnValue(Promise.resolve({ data: [] })),
} as Partial<SupabaseClient>;

const embeddings = new FakeEmbeddings();
const vectorStore = new SupabaseVectorStore(embeddings, {
client: supabaseClientMock as SupabaseClient,
tableName: "documents",
queryName: "match_documents",
filter: { a: 2 },
});
await vectorStore.similaritySearchVectorWithScore([1, 2, 3], 5);
expect(supabaseClientMock.rpc).toHaveBeenCalledWith("match_documents", {
filter: { a: 2 },
query_embedding: [1, 2, 3],
match_count: 5,
});
});

test("similaritySearchVectorWithScore should call RPC with the passed filters", async () => {
const supabaseClientMock = {
rpc: jest.fn().mockReturnValue(Promise.resolve({ data: [] })),
} as Partial<SupabaseClient>;

const embeddings = new FakeEmbeddings();
const vectorStore = new SupabaseVectorStore(embeddings, {
client: supabaseClientMock as SupabaseClient,
tableName: "documents",
queryName: "match_documents",
});

await vectorStore.similaritySearchVectorWithScore([1, 2, 3], 5, { b: 3 });
expect(supabaseClientMock.rpc).toHaveBeenCalledWith("match_documents", {
filter: { b: 3 },
query_embedding: [1, 2, 3],
match_count: 5,
});
});

0 comments on commit f6f06e0

Please sign in to comment.