Skip to content

Commit

Permalink
Fix: Chroma and Pinecone integration tests & standalone translator cl…
Browse files Browse the repository at this point in the history
…asses (langchain-ai#1542)

* added chroma and pinecone integration test, also created pinecone and chroma standalone translators

* replaced neq with ne

* Removed in and nin as comparator

* Separate into entrypoints

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
ppramesi and jacoblee93 authored Jun 7, 2023
1 parent c4f4b49 commit 9af90c9
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 10 deletions.
8 changes: 3 additions & 5 deletions examples/src/retrievers/chroma_self_query.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import { AttributeInfo } from "langchain/schema/query_constructor";
import { Document } from "langchain/document";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import {
SelfQueryRetriever,
BasicTranslator,
} from "langchain/retrievers/self_query";
import { SelfQueryRetriever } from "langchain/retrievers/self_query";
import { ChromaTranslator } from "langchain/retrievers/self_query/chroma";
import { OpenAI } from "langchain/llms/openai";
import { Chroma } from "langchain/vectorstores/chroma";

Expand Down Expand Up @@ -104,7 +102,7 @@ const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
* abstract class. Note that the vector store needs to support filtering on the metadata
* attributes you want to query on.
*/
structuredQueryTranslator: new BasicTranslator(),
structuredQueryTranslator: new ChromaTranslator(),
});

/**
Expand Down
9 changes: 4 additions & 5 deletions examples/src/retrievers/pinecone_self_query.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { PineconeClient } from "@pinecone-database/pinecone";

import { AttributeInfo } from "langchain/schema/query_constructor";
import { Document } from "langchain/document";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import {
SelfQueryRetriever,
BasicTranslator,
} from "langchain/retrievers/self_query";
import { SelfQueryRetriever } from "langchain/retrievers/self_query";
import { PineconeTranslator } from "langchain/retrievers/self_query/pinecone";
import { PineconeStore } from "langchain/vectorstores/pinecone";
import { OpenAI } from "langchain/llms/openai";

Expand Down Expand Up @@ -122,7 +121,7 @@ const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
* abstract class. Note that the vector store needs to support filtering on the metadata
* attributes you want to query on.
*/
structuredQueryTranslator: new BasicTranslator(),
structuredQueryTranslator: new PineconeTranslator(),
});

/**
Expand Down
6 changes: 6 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,15 @@ retrievers/hyde.d.ts
retrievers/self_query.cjs
retrievers/self_query.js
retrievers/self_query.d.ts
retrievers/self_query/chroma.cjs
retrievers/self_query/chroma.js
retrievers/self_query/chroma.d.ts
retrievers/self_query/functional.cjs
retrievers/self_query/functional.js
retrievers/self_query/functional.d.ts
retrievers/self_query/pinecone.cjs
retrievers/self_query/pinecone.js
retrievers/self_query/pinecone.d.ts
retrievers/self_query/supabase.cjs
retrievers/self_query/supabase.js
retrievers/self_query/supabase.d.ts
Expand Down
16 changes: 16 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,15 @@
"retrievers/self_query.cjs",
"retrievers/self_query.js",
"retrievers/self_query.d.ts",
"retrievers/self_query/chroma.cjs",
"retrievers/self_query/chroma.js",
"retrievers/self_query/chroma.d.ts",
"retrievers/self_query/functional.cjs",
"retrievers/self_query/functional.js",
"retrievers/self_query/functional.d.ts",
"retrievers/self_query/pinecone.cjs",
"retrievers/self_query/pinecone.js",
"retrievers/self_query/pinecone.d.ts",
"retrievers/self_query/supabase.cjs",
"retrievers/self_query/supabase.js",
"retrievers/self_query/supabase.d.ts",
Expand Down Expand Up @@ -1255,11 +1261,21 @@
"import": "./retrievers/self_query.js",
"require": "./retrievers/self_query.cjs"
},
"./retrievers/self_query/chroma": {
"types": "./retrievers/self_query/chroma.d.ts",
"import": "./retrievers/self_query/chroma.js",
"require": "./retrievers/self_query/chroma.cjs"
},
"./retrievers/self_query/functional": {
"types": "./retrievers/self_query/functional.d.ts",
"import": "./retrievers/self_query/functional.js",
"require": "./retrievers/self_query/functional.cjs"
},
"./retrievers/self_query/pinecone": {
"types": "./retrievers/self_query/pinecone.d.ts",
"import": "./retrievers/self_query/pinecone.js",
"require": "./retrievers/self_query/pinecone.cjs"
},
"./retrievers/self_query/supabase": {
"types": "./retrievers/self_query/supabase.d.ts",
"import": "./retrievers/self_query/supabase.js",
Expand Down
4 changes: 4 additions & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ const entrypoints = {
"retrievers/document_compressors/chain_extract",
"retrievers/hyde": "retrievers/hyde",
"retrievers/self_query": "retrievers/self_query/index",
"retrievers/self_query/chroma": "retrievers/self_query/chroma",
"retrievers/self_query/functional": "retrievers/self_query/functional",
"retrievers/self_query/pinecone": "retrievers/self_query/pinecone",
"retrievers/self_query/supabase": "retrievers/self_query/supabase",
"retrievers/vespa": "retrievers/vespa",
// cache
Expand Down Expand Up @@ -234,7 +236,9 @@ const requiresOptionalDependency = [
"retrievers/zep",
"retrievers/metal",
"retrievers/self_query",
"retrievers/self_query/chroma",
"retrievers/self_query/functional",
"retrievers/self_query/pinecone",
"retrievers/self_query/supabase",
"output_parsers/expression",
"chains/query_constructor",
Expand Down
18 changes: 18 additions & 0 deletions langchain/src/retrievers/self_query/chroma.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { Comparators, Operators } from "../../chains/query_constructor/ir.js";
import { BasicTranslator } from "./base.js";

export class ChromaTranslator extends BasicTranslator {
constructor() {
super({
allowedOperators: [Operators.and, Operators.or],
allowedComparators: [
Comparators.eq,
Comparators.ne,
Comparators.gt,
Comparators.gte,
Comparators.lt,
Comparators.lte,
],
});
}
}
18 changes: 18 additions & 0 deletions langchain/src/retrievers/self_query/pinecone.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { Comparators, Operators } from "../../chains/query_constructor/ir.js";
import { BasicTranslator } from "./base.js";

export class PineconeTranslator extends BasicTranslator {
constructor() {
super({
allowedOperators: [Operators.and, Operators.or],
allowedComparators: [
Comparators.eq,
Comparators.ne,
Comparators.gt,
Comparators.gte,
Comparators.lt,
Comparators.lte,
],
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import { test } from "@jest/globals";
import { Document } from "../../../document.js";
import { AttributeInfo } from "../../../schema/query_constructor.js";
import { OpenAIEmbeddings } from "../../../embeddings/openai.js";
import { SelfQueryRetriever } from "../index.js";
import { ChromaTranslator } from "../chroma.js";
import { OpenAI } from "../../../llms/openai.js";
import { Chroma } from "../../../vectorstores/chroma.js";

test("Chroma Store Self Query Retriever Test", async () => {
const docs = [
new Document({
pageContent:
"A bunch of scientists bring back dinosaurs and mayhem breaks loose",
metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
}),
new Document({
pageContent:
"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
}),
new Document({
pageContent:
"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
}),
new Document({
pageContent:
"A bunch of normal-sized women are supremely wholesome and some men pine after them",
metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
}),
new Document({
pageContent: "Toys come alive and have a blast doing so",
metadata: { year: 1995, genre: "animated" },
}),
new Document({
pageContent:
"Three men walk into the Zone, three men walk out of the Zone",
metadata: {
year: 1979,
director: "Andrei Tarkovsky",
genre: "science fiction",
rating: 9.9,
},
}),
];

const attributeInfo: AttributeInfo[] = [
{
name: "genre",
description: "The genre of the movie",
type: "string or array of strings",
},
{
name: "year",
description: "The year the movie was released",
type: "number",
},
{
name: "director",
description: "The director of the movie",
type: "string",
},
{
name: "rating",
description: "The rating of the movie (1-10)",
type: "number",
},
{
name: "length",
description: "The length of the movie in minutes",
type: "number",
},
];

const embeddings = new OpenAIEmbeddings();
const llm = new OpenAI();
const documentContents = "Brief summary of a movie";
const vectorStore = await Chroma.fromDocuments(docs, embeddings, {
collectionName: "a-movie-collection",
});
const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
llm,
vectorStore,
documentContents,
attributeInfo,
structuredQueryTranslator: new ChromaTranslator(),
});

const query1 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are less than 90 minutes?"
);
const query2 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are rated higher than 8.5?"
);
const query3 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are directed by Greta Gerwig?"
);
const query4 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are either comedy or drama and are less than 90 minutes?"
);
console.log(query1, query2, query3, query4);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/* eslint-disable no-process-env */
import { test } from "@jest/globals";
import { PineconeClient } from "@pinecone-database/pinecone";
import { Document } from "../../../document.js";
import { AttributeInfo } from "../../../schema/query_constructor.js";
import { OpenAIEmbeddings } from "../../../embeddings/openai.js";
import { SelfQueryRetriever } from "../index.js";
import { PineconeTranslator } from "../pinecone.js";
import { OpenAI } from "../../../llms/openai.js";
import { PineconeStore } from "../../../vectorstores/pinecone.js";

test("Pinecone Store Self Query Retriever Test", async () => {
const docs = [
new Document({
pageContent:
"A bunch of scientists bring back dinosaurs and mayhem breaks loose",
metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
}),
new Document({
pageContent:
"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
}),
new Document({
pageContent:
"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
}),
new Document({
pageContent:
"A bunch of normal-sized women are supremely wholesome and some men pine after them",
metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
}),
new Document({
pageContent: "Toys come alive and have a blast doing so",
metadata: { year: 1995, genre: "animated" },
}),
new Document({
pageContent:
"Three men walk into the Zone, three men walk out of the Zone",
metadata: {
year: 1979,
director: "Andrei Tarkovsky",
genre: "science fiction",
rating: 9.9,
},
}),
];

const attributeInfo: AttributeInfo[] = [
{
name: "genre",
description: "The genre of the movie",
type: "string or array of strings",
},
{
name: "year",
description: "The year the movie was released",
type: "number",
},
{
name: "director",
description: "The director of the movie",
type: "string",
},
{
name: "rating",
description: "The rating of the movie (1-10)",
type: "number",
},
{
name: "length",
description: "The length of the movie in minutes",
type: "number",
},
];

if (
!process.env.PINECONE_API_KEY ||
!process.env.PINECONE_ENVIRONMENT ||
!process.env.PINECONE_INDEX
) {
throw new Error(
"PINECONE_ENVIRONMENT and PINECONE_API_KEY and PINECONE_INDEX must be set"
);
}

const client = new PineconeClient();
await client.init({
apiKey: process.env.PINECONE_API_KEY,
environment: process.env.PINECONE_ENVIRONMENT,
});
const index = client.Index(process.env.PINECONE_INDEX);

const embeddings = new OpenAIEmbeddings();
const llm = new OpenAI();
const documentContents = "Brief summary of a movie";
const vectorStore = await PineconeStore.fromDocuments(docs, embeddings, {
pineconeIndex: index,
});
const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
llm,
vectorStore,
documentContents,
attributeInfo,
structuredQueryTranslator: new PineconeTranslator(),
});

const query1 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are less than 90 minutes?"
);
const query2 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are rated higher than 8.5?"
);
const query3 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are directed by Greta Gerwig?"
);
const query4 = await selfQueryRetriever.getRelevantDocuments(
"Which movies are either comedy or drama and are less than 90 minutes?"
);
console.log(query1, query2, query3, query4);
});
2 changes: 2 additions & 0 deletions langchain/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@
"src/retrievers/document_compressors/chain_extract.ts",
"src/retrievers/hyde.ts",
"src/retrievers/self_query/index.ts",
"src/retrievers/self_query/chroma.ts",
"src/retrievers/self_query/functional.ts",
"src/retrievers/self_query/pinecone.ts",
"src/retrievers/self_query/supabase.ts",
"src/retrievers/vespa.ts",
"src/cache/index.ts",
Expand Down

0 comments on commit 9af90c9

Please sign in to comment.