Skip to content

Commit

Permalink
Allow ids passed into SupabaseVectorStore for upsertion, adds tests, …
Browse files Browse the repository at this point in the history
…updates docs (langchain-ai#1915)

* Allow ids passed into SupabaseVectorStore for upsertion, adds tests, updates docs

* Change bad variable name
  • Loading branch information
jacoblee93 authored Jul 10, 2023
1 parent 53b6cb4 commit 542bf80
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ const docs = [
}),
];

// Also takes an additional {ids: []} parameter for upsertion
const ids = await pineconeStore.addDocuments(docs);

const results = await pineconeStore.similaritySearch(pageContent, 2, {
Expand Down
1 change: 1 addition & 0 deletions examples/src/indexes/vector_stores/chroma/delete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ const documents = [
},
];

// Also supports an additional {ids: []} parameter for upsertion
const ids = await vectorStore.addDocuments(documents);

const response = await vectorStore.similaritySearch("scared", 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export async function run() {
// await ElasticVectorSearch.fromDocuments(docs, embeddings, clientArgs);
const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);

// Also supports an additional {ids: []} parameter for upsertion
const ids = await vectorStore.addDocuments(docs);

/* Search the vector DB independently with meta filters */
Expand Down
1 change: 1 addition & 0 deletions examples/src/indexes/vector_stores/supabase_deletion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export const run = async () => {
{ pageContent: "hello", metadata: { b: 1, c: 9, stuff: "wrong" } },
];

// Also takes an additional {ids: []} parameter for upsertion
const ids = await store.addDocuments(docs);

const resultA = await store.similaritySearch("hello", 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const run = async () => {
{ pageContent: "what's this", metadata: { b: 4, c: 6, stuff: "right" } },
];

// Also supports an additional {ids: []} parameter for upsertion
await store.addDocuments(docs);

const funcFilterA: SupabaseFilterRPCCall = (rpc) =>
Expand Down
1 change: 1 addition & 0 deletions examples/src/indexes/vector_stores/weaviate_delete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export async function run() {

const docs = [{ pageContent: "see ya!", metadata: { foo: "bar" } }];

// Also supports an additional {ids: []} parameter for upsertion
const ids = await store.addDocuments(docs);

// Search the index without any filters
Expand Down
24 changes: 17 additions & 7 deletions langchain/src/vectorstores/supabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,20 @@ export class SupabaseVectorStore extends VectorStore {
this.filter = args.filter;
}

async addDocuments(documents: Document[]) {
async addDocuments(documents: Document[], options?: { ids?: string[] }) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(
await this.embeddings.embedDocuments(texts),
documents
documents,
options
);
}

async addVectors(vectors: number[][], documents: Document[]) {
async addVectors(
vectors: number[][],
documents: Document[],
options?: { ids?: string[] }
) {
const rows = vectors.map((embedding, idx) => ({
content: documents[idx].pageContent,
embedding,
Expand All @@ -68,9 +73,14 @@ export class SupabaseVectorStore extends VectorStore {
// upsert returns 500/502/504 (yes really any of them) if given too many rows/characters
// ~2000 trips it, but my data is probably smaller than average pageContent and metadata
const chunkSize = 500;
let ids: string[] = [];
let returnedIds: string[] = [];
for (let i = 0; i < rows.length; i += chunkSize) {
const chunk = rows.slice(i, i + chunkSize);
const chunk = rows.slice(i, i + chunkSize).map((row) => {
if (options?.ids) {
return { id: options.ids[i], ...row };
}
return row;
});

const res = await this.client.from(this.tableName).upsert(chunk).select();
if (res.error) {
Expand All @@ -79,10 +89,10 @@ export class SupabaseVectorStore extends VectorStore {
);
}
if (res.data) {
ids = ids.concat(res.data.map((row) => row.id));
returnedIds = returnedIds.concat(res.data.map((row) => row.id));
}
}
return ids;
return returnedIds;
}

async delete(params: { ids: string[] }): Promise<void> {
Expand Down
32 changes: 32 additions & 0 deletions langchain/src/vectorstores/tests/chroma.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,38 @@ describe("Chroma", () => {
]);
});

test.skip("upsert", async () => {
const pageContent = faker.lorem.sentence(5);
const id = uuid.v4();

const ids = await chromaStore.addDocuments([
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
]);

const results = await chromaStore.similaritySearch(pageContent, 4, {
foo: id,
});

expect(results.length).toEqual(2);

const ids2 = await chromaStore.addDocuments(
[
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: id } },
],
{ ids }
);

expect(ids).toEqual(ids2);

const newResults = await chromaStore.similaritySearch(pageContent, 4, {
foo: id,
});

expect(newResults.length).toEqual(2);
});

test.skip("delete by ids", async () => {
const pageContent = faker.lorem.sentence(5);
const id = uuid.v4();
Expand Down
42 changes: 32 additions & 10 deletions langchain/src/vectorstores/tests/elasticsearch.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,31 +36,53 @@ test("ElasticVectorSearch integration", async () => {

expect(store).toBeDefined();

const createdAt = new Date().getTime();

const ids = await store.addDocuments([
{ pageContent: "hello", metadata: { a: 2 } },
{ pageContent: "car", metadata: { a: 1 } },
{ pageContent: "adjective", metadata: { a: 1 } },
{ pageContent: "hi", metadata: { a: 1 } },
{ pageContent: "hello", metadata: { a: createdAt + 1 } },
{ pageContent: "car", metadata: { a: createdAt } },
{ pageContent: "adjective", metadata: { a: createdAt } },
{ pageContent: "hi", metadata: { a: createdAt } },
]);

const results1 = await store.similaritySearch("hello!", 1);

expect(results1).toHaveLength(1);
expect(results1).toEqual([
new Document({ metadata: { a: 2 }, pageContent: "hello" }),
new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }),
]);

const results2 = await store.similaritySearchWithScore("testing!", 3, {
a: 1,
const results2 = await store.similaritySearchWithScore("testing!", 6, {
a: createdAt,
});

expect(results2).toHaveLength(3);

const ids2 = await store.addDocuments(
[
{ pageContent: "hello upserted", metadata: { a: createdAt + 1 } },
{ pageContent: "car upserted", metadata: { a: createdAt } },
{ pageContent: "adjective upserted", metadata: { a: createdAt } },
{ pageContent: "hi upserted", metadata: { a: createdAt } },
],
{ ids }
);

expect(ids).toEqual(ids2);

const results3 = await store.similaritySearchWithScore("testing!", 6, {
a: createdAt,
});

expect(results3).toHaveLength(3);

console.log(`Upserted:`, results3);

await store.delete({ ids: ids.slice(2) });

const results3 = await store.similaritySearchWithScore("hello!", 1, {
a: 1,
const results4 = await store.similaritySearchWithScore("testing!", 3, {
a: createdAt,
});

expect(results3).toHaveLength(1);
expect(results4).toHaveLength(1);
});
13 changes: 12 additions & 1 deletion langchain/src/vectorstores/tests/pinecone.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,20 @@ describe("PineconeStore", () => {
[documentId]
);

const results = await pineconeStore.similaritySearch(pageContent, 1);
const results = await pineconeStore.similaritySearch(pageContent, 2);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);

await pineconeStore.addDocuments(
[{ pageContent: `${pageContent} upserted`, metadata: {} }],
[documentId]
);

const results2 = await pineconeStore.similaritySearch(pageContent, 2);

expect(results2).toEqual([
new Document({ metadata: {}, pageContent: `${pageContent} upserted` }),
]);
});

test("auto-generated ids", async () => {
Expand Down
37 changes: 37 additions & 0 deletions langchain/src/vectorstores/tests/supabase.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,43 @@ test("Search a SupabaseVectorStore with a functional metadata filter", async ()
]);
});

test("Upsert on a SupabaseVectorStore", async () => {
const client = createClient(
process.env.SUPABASE_VECTOR_STORE_URL!,
process.env.SUPABASE_VECTOR_STORE_PRIVATE_KEY!
);

const embeddings = new OpenAIEmbeddings();

const store = new SupabaseVectorStore(embeddings, {
client,
tableName: "documents",
});

expect(store).toBeDefined();

const createdAt = new Date().getTime();

const ids = await store.addDocuments([
{ pageContent: "hello 0", metadata: { created_at: createdAt } },
]);

const results = await store.similaritySearch("hello", 2, {
created_at: createdAt,
});
expect(results).toHaveLength(1);
const ids2 = await store.addDocuments(
[{ pageContent: "hello 1", metadata: { created_at: createdAt } }],
{ ids }
);
expect(ids).toEqual(ids2);
const results2 = await store.similaritySearch("hello", 2, {
created_at: createdAt,
});
expect(results2).toHaveLength(1);
expect(results2[0].pageContent).toEqual("hello 1");
});

test("Delete on a SupabaseVectorStore", async () => {
const client = createClient(
process.env.SUPABASE_VECTOR_STORE_URL!,
Expand Down
44 changes: 39 additions & 5 deletions langchain/src/vectorstores/tests/weaviate.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ test.skip("WeaviateStore", async () => {
]);
});

test.skip("WeaviateStore delete", async () => {
test.skip("WeaviateStore upsert + delete", async () => {
// Something wrong with the weaviate-ts-client types, so we need to disable
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const client = (weaviate as any).client({
Expand Down Expand Up @@ -131,7 +131,7 @@ test.skip("WeaviateStore delete", async () => {
},
]);

const results = await store.similaritySearch("hello world", 2, {
const results = await store.similaritySearch("hello world", 4, {
where: {
operator: "Equal",
path: ["deletionTest"],
Expand All @@ -149,9 +149,23 @@ test.skip("WeaviateStore delete", async () => {
}),
]);

await store.delete({ ids: ids.slice(0, 1) });
const ids2 = await store.addDocuments(
[
{
pageContent: "hello world upserted",
metadata: { deletionTest: (createdAt + 1).toString() },
},
{
pageContent: "hello world upserted",
metadata: { deletionTest: (createdAt + 1).toString() },
},
],
{ ids }
);

const results2 = await store.similaritySearch("hello world", 1, {
expect(ids2).toEqual(ids);

const results2 = await store.similaritySearch("hello world", 4, {
where: {
operator: "Equal",
path: ["deletionTest"],
Expand All @@ -160,7 +174,27 @@ test.skip("WeaviateStore delete", async () => {
});
expect(results2).toEqual([
new Document({
pageContent: "hello world",
pageContent: "hello world upserted",
metadata: { deletionTest: (createdAt + 1).toString() },
}),
new Document({
pageContent: "hello world upserted",
metadata: { deletionTest: (createdAt + 1).toString() },
}),
]);

await store.delete({ ids: ids.slice(0, 1) });

const results3 = await store.similaritySearch("hello world", 1, {
where: {
operator: "Equal",
path: ["deletionTest"],
valueText: (createdAt + 1).toString(),
},
});
expect(results3).toEqual([
new Document({
pageContent: "hello world upserted",
metadata: { deletionTest: (createdAt + 1).toString() },
}),
]);
Expand Down

0 comments on commit 542bf80

Please sign in to comment.