Adds metadata filtering for SemanticSimilarityExampleSelector, update…

…s docs (langchain-ai#2834) * Adds metadata filtering for SemanticSimilarityExampleSelector, updates docs * Update docs
jdubois · Oct 8, 2023 · fa98f73 · fa98f73
1 parent a672831
commit fa98f73
Show file tree

Hide file tree

Showing 7 changed files with 298 additions and 62 deletions.
diff --git a/docs/snippets/modules/model_io/prompts/example_selectors/similarity.mdx b/docs/snippets/modules/model_io/prompts/example_selectors/similarity.mdx
@@ -1,4 +1,32 @@
 import CodeBlock from "@theme/CodeBlock";
 import ExampleSimilarity from "@examples/prompts/semantic_similarity_example_selector.ts";
 
+The fields of the examples object will be used as parameters to format the `examplePrompt` passed to the `FewShotPromptTemplate`. 
+Each example should therefore contain all required fields for the example prompt you are using.
+
 <CodeBlock language="typescript">{ExampleSimilarity}</CodeBlock>
+
+By default, each field in the examples object is concatenated together, embedded, and stored in the vectorstore for
+later similarity search against user queries. 
+
+If you only want to embed specific keys
+(e.g., you only want to search for examples that have a similar query to the one the user provides), you can pass an `inputKeys` 
+array in the final `options` parameter.
+
+## Loading from an existing vectorstore
+
+You can also use a pre-initialized vector store by passing an instance to the `SemanticSimilarityExampleSelector` constructor 
+directly, as shown below. You can also add more examples via the `addExample` method:
+
+import ExampleSimilarityFromExisting from "@examples/prompts/semantic_similarity_example_selector_from_existing.ts";
+
+<CodeBlock language="typescript">{ExampleSimilarityFromExisting}</CodeBlock>
+
+## Metadata filtering
+
+When adding examples, each field is available as metadata in the produced document. If you would like further control over your
+search space, you can add extra fields to your examples and pass a `filter` parameter when initializing your selector:
+
+import ExampleSimilarityMetadataFiltering from "@examples/prompts/semantic_similarity_example_selector_metadata_filtering.ts";
+
+<CodeBlock language="typescript">{ExampleSimilarityMetadataFiltering}</CodeBlock>
diff --git a/examples/src/embeddings/bedrock.ts b/examples/src/embeddings/bedrock.ts
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/no-non-null-assertion */
 import { BedrockEmbeddings } from "langchain/embeddings/bedrock";
 
 const embeddings = new BedrockEmbeddings({

diff --git a/examples/src/prompts/semantic_similarity_example_selector.ts b/examples/src/prompts/semantic_similarity_example_selector.ts
@@ -6,58 +6,55 @@ import {
 } from "langchain/prompts";
 import { HNSWLib } from "langchain/vectorstores/hnswlib";
 
-export async function run() {
-  // Create a prompt template that will be used to format the examples.
-  const examplePrompt = new PromptTemplate({
-    inputVariables: ["input", "output"],
-    template: "Input: {input}\nOutput: {output}",
-  });
-
-  // Create a SemanticSimilarityExampleSelector that will be used to select the examples.
-  const exampleSelector = await SemanticSimilarityExampleSelector.fromExamples(
-    [
-      { input: "happy", output: "sad" },
-      { input: "tall", output: "short" },
-      { input: "energetic", output: "lethargic" },
-      { input: "sunny", output: "gloomy" },
-      { input: "windy", output: "calm" },
-    ],
-    new OpenAIEmbeddings(),
-    HNSWLib,
-    { k: 1 }
-  );
-
-  // Create a FewShotPromptTemplate that will use the example selector.
-  const dynamicPrompt = new FewShotPromptTemplate({
-    // We provide an ExampleSelector instead of examples.
-    exampleSelector,
-    examplePrompt,
-    prefix: "Give the antonym of every input",
-    suffix: "Input: {adjective}\nOutput:",
-    inputVariables: ["adjective"],
-  });
-
-  // Input is about the weather, so should select eg. the sunny/gloomy example
-  console.log(await dynamicPrompt.format({ adjective: "rainy" }));
-  /*
-   Give the antonym of every input
-
-   Input: sunny
-   Output: gloomy
-
-   Input: rainy
-   Output:
-   */
-
-  // Input is a measurement, so should select the tall/short example
-  console.log(await dynamicPrompt.format({ adjective: "large" }));
-  /*
-   Give the antonym of every input
-
-   Input: tall
-   Output: short
-
-   Input: large
-   Output:
-   */
-}
+// Create a prompt template that will be used to format the examples.
+const examplePrompt = PromptTemplate.fromTemplate(
+  "Input: {input}\nOutput: {output}"
+);
+
+// Create a SemanticSimilarityExampleSelector that will be used to select the examples.
+const exampleSelector = await SemanticSimilarityExampleSelector.fromExamples(
+  [
+    { input: "happy", output: "sad" },
+    { input: "tall", output: "short" },
+    { input: "energetic", output: "lethargic" },
+    { input: "sunny", output: "gloomy" },
+    { input: "windy", output: "calm" },
+  ],
+  new OpenAIEmbeddings(),
+  HNSWLib,
+  { k: 1 }
+);
+
+// Create a FewShotPromptTemplate that will use the example selector.
+const dynamicPrompt = new FewShotPromptTemplate({
+  // We provide an ExampleSelector instead of examples.
+  exampleSelector,
+  examplePrompt,
+  prefix: "Give the antonym of every input",
+  suffix: "Input: {adjective}\nOutput:",
+  inputVariables: ["adjective"],
+});
+
+// Input is about the weather, so should select eg. the sunny/gloomy example
+console.log(await dynamicPrompt.format({ adjective: "rainy" }));
+/*
+  Give the antonym of every input
+
+  Input: sunny
+  Output: gloomy
+
+  Input: rainy
+  Output:
+*/
+
+// Input is a measurement, so should select the tall/short example
+console.log(await dynamicPrompt.format({ adjective: "large" }));
+/*
+  Give the antonym of every input
+
+  Input: tall
+  Output: short
+
+  Input: large
+  Output:
+*/
diff --git a/examples/src/prompts/semantic_similarity_example_selector_from_existing.ts b/examples/src/prompts/semantic_similarity_example_selector_from_existing.ts
@@ -0,0 +1,102 @@
+// Ephemeral, in-memory vector store for demo purposes
+import { MemoryVectorStore } from "langchain/vectorstores/memory";
+import {
+  SemanticSimilarityExampleSelector,
+  PromptTemplate,
+  FewShotPromptTemplate,
+} from "langchain/prompts";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { ChatOpenAI } from "langchain/chat_models/openai";
+
+const embeddings = new OpenAIEmbeddings();
+
+const memoryVectorStore = new MemoryVectorStore(embeddings);
+
+const examples = [
+  {
+    query: "healthy food",
+    output: `galbi`,
+  },
+  {
+    query: "healthy food",
+    output: `schnitzel`,
+  },
+  {
+    query: "foo",
+    output: `bar`,
+  },
+];
+
+const exampleSelector = new SemanticSimilarityExampleSelector({
+  vectorStore: memoryVectorStore,
+  k: 2,
+  // Only embed the "query" key of each example
+  inputKeys: ["query"],
+});
+
+for (const example of examples) {
+  // Format and add an example to the underlying vector store
+  await exampleSelector.addExample(example);
+}
+
+// Create a prompt template that will be used to format the examples.
+const examplePrompt = PromptTemplate.fromTemplate(`<example>
+  <user_input>
+    {query}
+  </user_input>
+  <output>
+    {output}
+  </output>
+</example>`);
+
+// Create a FewShotPromptTemplate that will use the example selector.
+const dynamicPrompt = new FewShotPromptTemplate({
+  // We provide an ExampleSelector instead of examples.
+  exampleSelector,
+  examplePrompt,
+  prefix: `Answer the user's question, using the below examples as reference:`,
+  suffix: "User question: {query}",
+  inputVariables: ["query"],
+});
+
+const formattedValue = await dynamicPrompt.format({
+  query: "What is a healthy food?",
+});
+console.log(formattedValue);
+
+/*
+Answer the user's question, using the below examples as reference:
+
+<example>
+  <user_input>
+    healthy
+  </user_input>
+  <output>
+    galbi
+  </output>
+</example>
+
+<example>
+  <user_input>
+    healthy
+  </user_input>
+  <output>
+    schnitzel
+  </output>
+</example>
+
+User question: What is a healthy food?
+*/
+
+const model = new ChatOpenAI({});
+
+const chain = dynamicPrompt.pipe(model);
+
+const result = await chain.invoke({ query: "What is a healthy food?" });
+console.log(result);
+/*
+  AIMessage {
+    content: 'A healthy food can be galbi or schnitzel.',
+    additional_kwargs: { function_call: undefined }
+  }
+*/
diff --git a/examples/src/prompts/semantic_similarity_example_selector_metadata_filtering.ts b/examples/src/prompts/semantic_similarity_example_selector_metadata_filtering.ts
@@ -0,0 +1,82 @@
+// Ephemeral, in-memory vector store for demo purposes
+import { MemoryVectorStore } from "langchain/vectorstores/memory";
+import {
+  SemanticSimilarityExampleSelector,
+  PromptTemplate,
+  FewShotPromptTemplate,
+} from "langchain/prompts";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import { Document } from "langchain/document";
+
+const embeddings = new OpenAIEmbeddings();
+
+const memoryVectorStore = new MemoryVectorStore(embeddings);
+
+const examples = [
+  {
+    query: "healthy food",
+    output: `lettuce`,
+    food_type: "vegetable",
+  },
+  {
+    query: "healthy food",
+    output: `schnitzel`,
+    food_type: "veal",
+  },
+  {
+    query: "foo",
+    output: `bar`,
+    food_type: "baz",
+  },
+];
+
+const exampleSelector = new SemanticSimilarityExampleSelector({
+  vectorStore: memoryVectorStore,
+  k: 2,
+  // Only embed the "query" key of each example
+  inputKeys: ["query"],
+  // Filter type will depend on your specific vector store.
+  // See the section of the docs for the specific vector store you are using.
+  filter: (doc: Document) => doc.metadata.food_type === "vegetable",
+});
+
+for (const example of examples) {
+  // Format and add an example to the underlying vector store
+  await exampleSelector.addExample(example);
+}
+
+// Create a prompt template that will be used to format the examples.
+const examplePrompt = PromptTemplate.fromTemplate(`<example>
+  <user_input>
+    {query}
+  </user_input>
+  <output>
+    {output}
+  </output>
+</example>`);
+
+// Create a FewShotPromptTemplate that will use the example selector.
+const dynamicPrompt = new FewShotPromptTemplate({
+  // We provide an ExampleSelector instead of examples.
+  exampleSelector,
+  examplePrompt,
+  prefix: `Answer the user's question, using the below examples as reference:`,
+  suffix: "User question:\n{query}",
+  inputVariables: ["query"],
+});
+
+const model = new ChatOpenAI({});
+
+const chain = dynamicPrompt.pipe(model);
+
+const result = await chain.invoke({
+  query: "What is exactly one type of healthy food?",
+});
+console.log(result);
+/*
+  AIMessage {
+    content: 'One type of healthy food is lettuce.',
+    additional_kwargs: { function_call: undefined }
+  }
+*/