Skip to content

Commit

Permalink
Fix refine chain, add docs (langchain-ai#763)
Browse files Browse the repository at this point in the history
* Fix refine chain, add docs

* Update wording

* Fix
  • Loading branch information
nfcampos authored Apr 12, 2023
1 parent b3fe15c commit 2622bf4
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 7 deletions.
17 changes: 16 additions & 1 deletion docs/docs/modules/chains/index_related_chains/document_qa.mdx
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
---
hide_table_of_contents: true
---

import QAExample from "@examples/chains/question_answering.ts";
import RefineExample from "@examples/chains/qa_refine.ts";
import CodeBlock from "@theme/CodeBlock";

# Document QA Chains

LangChain provides chains used for processing unstructured text data: `StuffDocumentsChain` and `MapReduceDocumentsChain`.
LangChain provides chains used for processing unstructured text data: `StuffDocumentsChain`, `MapReduceDocumentsChain` and `RefineDocumentsChain`.
These chains are the building blocks more complex chains for processing unstructured text data and receive both documents and a question as input. They then utilize the language model to provide an answer to the question based on the given documents.

- `StuffDocumentsChain`: This chain is the simplest of the 3 chains and simply injects all documents passes in into the prompt. It then returns the answer to the question, using all documents as context. It is suitable for QA tasks over a small number of documents.
- `MapReduceDocumentsChain`: This chain adds a preprocessing step to select relevant portions of each document until the total number of tokens is less than the maximum number of tokens allowed by the model. It then uses the transformed docs as context to answer the question. It is suitable for QA tasks over larger documents, and it runs the preprocessing step in parallel, which can reduce the running time.
- `RefineDocumentsChain`: This chain iterates over the documents one by one to update a running answer, at each turn using the previous version of the answer and the next doc as context. It is suitable for QA tasks over a large number of documents.

## Usage, `StuffDocumentsChain` and `MapReduceDocumentsChain`

<CodeBlock language="typescript">{QAExample}</CodeBlock>

## Usage, `RefineDocumentsChain`

<CodeBlock language="typescript">{RefineExample}</CodeBlock>
36 changes: 36 additions & 0 deletions examples/src/chains/qa_refine.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { loadQARefineChain } from "langchain/chains";
import { OpenAI } from "langchain/llms/openai";
import { TextLoader } from "langchain/document_loaders/fs/text";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";

export async function run() {
// Create the models and chain
const embeddings = new OpenAIEmbeddings();
const model = new OpenAI({ temperature: 0 });
const chain = loadQARefineChain(model);

// Load the documents and create the vector store
const loader = new TextLoader("./state_of_the_union.txt");
const docs = await loader.loadAndSplit();
const store = await MemoryVectorStore.fromDocuments(docs, embeddings);

// Select the relevant documents
const question = "What did the president say about Justice Breyer";
const relevantDocs = await store.similaritySearch(question);

// Call the chain
const res = await chain.call({
input_documents: relevantDocs,
question,
});

console.log(res);
/*
{
output_text: '\n' +
'\n' +
"The president said that Justice Stephen Breyer has dedicated his life to serve this country and thanked him for his service. He also mentioned that Judge Ketanji Brown Jackson will continue Justice Breyer's legacy of excellence, and that the constitutional right affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before. He emphasized the importance of protecting access to health care, preserving a woman's right to choose, and advancing maternal health care in America. He also expressed his support for the LGBTQ+ community, and his commitment to protecting their rights, including offering a Unity Agenda for the Nation to beat the opioid epidemic, increase funding for prevention, treatment, harm reduction, and recovery, and strengthen the Violence Against Women Act."
}
*/
}
18 changes: 12 additions & 6 deletions langchain/src/chains/combine_docs_chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ export class RefineDocumentsChain
fields.initialResponseName ?? this.initialResponseName;
}

_constructInitialInputs(doc: Document, rest: Record<string, unknown>) {
async _constructInitialInputs(doc: Document, rest: Record<string, unknown>) {
const baseInfo: Record<string, unknown> = {
page_content: doc.pageContent,
...doc.metadata,
Expand All @@ -289,15 +289,15 @@ export class RefineDocumentsChain
});

const baseInputs: Record<string, unknown> = {
[this.documentVariableName]: this.documentPrompt.format({
[this.documentVariableName]: await this.documentPrompt.format({
...documentInfo,
}),
};
const inputs = { ...baseInputs, ...rest };
return inputs;
}

_constructRefineInputs(doc: Document, res: string) {
async _constructRefineInputs(doc: Document, res: string) {
const baseInfo: Record<string, unknown> = {
page_content: doc.pageContent,
...doc.metadata,
Expand All @@ -307,7 +307,7 @@ export class RefineDocumentsChain
documentInfo[value] = baseInfo[value];
});
const baseInputs: Record<string, unknown> = {
[this.documentVariableName]: this.documentPrompt.format({
[this.documentVariableName]: await this.documentPrompt.format({
...documentInfo,
}),
};
Expand All @@ -323,13 +323,19 @@ export class RefineDocumentsChain

const currentDocs = docs as Document[];

const initialInputs = this._constructInitialInputs(currentDocs[0], rest);
const initialInputs = await this._constructInitialInputs(
currentDocs[0],
rest
);
let res = await this.llmChain.predict({ ...initialInputs });

const refineSteps = [res];

for (let i = 1; i < currentDocs.length; i += 1) {
const refineInputs = this._constructRefineInputs(currentDocs[i], res);
const refineInputs = await this._constructRefineInputs(
currentDocs[i],
res
);
const inputs = { ...refineInputs, ...rest };
res = await this.refineLLMChain.predict({ ...inputs });
refineSteps.push(res);
Expand Down

0 comments on commit 2622bf4

Please sign in to comment.