forked from langchain-ai/langchainjs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
langchain[patch]: Add possibility to rerank retrieved docs in ParentD…
…ocumentRetriever and MultiQueryRetriever (langchain-ai#4738) * feat: add Document Compressor to chain to allow rerank * feat: add example * fix: typo * feat: rerank child documents instead of parents one * feat: improve example * feat: add compressor to Multi Query Retreiver * feat: remove example * feat: remove example * fix: missing docs * feat: remove default value of threshold score to adjust to new Cohere models better * feat: make filtering optional and configurable * docs: add examples * fix: type checking so it allow for build * Fix lint --------- Co-authored-by: jacoblee93 <[email protected]>
- Loading branch information
1 parent
c35fd25
commit 6407078
Showing
4 changed files
with
157 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
examples/src/retrievers/parent_document_retriever_rerank.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import { OpenAIEmbeddings } from "@langchain/openai"; | ||
import { CohereRerank } from "@langchain/cohere"; | ||
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; | ||
import { InMemoryStore } from "langchain/storage/in_memory"; | ||
import { | ||
ParentDocumentRetriever, | ||
type SubDocs, | ||
} from "langchain/retrievers/parent_document"; | ||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; | ||
|
||
// init Cohere Rerank. Remember to add COHERE_API_KEY to your .env | ||
const reranker = new CohereRerank({ | ||
topN: 50, | ||
model: "rerank-multilingual-v2.0", | ||
}); | ||
|
||
export function documentCompressorFiltering({ | ||
relevanceScore, | ||
}: { relevanceScore?: number } = {}) { | ||
return (docs: SubDocs) => { | ||
let outputDocs = docs; | ||
|
||
if (relevanceScore) { | ||
const docsRelevanceScoreValues = docs.map( | ||
(doc) => doc?.metadata?.relevanceScore | ||
); | ||
outputDocs = docs.filter( | ||
(_doc, index) => | ||
(docsRelevanceScoreValues?.[index] || 1) >= relevanceScore | ||
); | ||
} | ||
|
||
return outputDocs; | ||
}; | ||
} | ||
|
||
const splitter = new RecursiveCharacterTextSplitter({ | ||
chunkSize: 500, | ||
chunkOverlap: 0, | ||
}); | ||
|
||
const jimDocs = await splitter.createDocuments([`Jim favorite color is blue.`]); | ||
|
||
const pamDocs = await splitter.createDocuments([`Pam favorite color is red.`]); | ||
|
||
const vectorstore = await HNSWLib.fromDocuments([], new OpenAIEmbeddings()); | ||
const docstore = new InMemoryStore(); | ||
|
||
const retriever = new ParentDocumentRetriever({ | ||
vectorstore, | ||
docstore, | ||
// Very small chunks for demo purposes. | ||
// Use a bigger chunk size for serious use-cases. | ||
childSplitter: new RecursiveCharacterTextSplitter({ | ||
chunkSize: 10, | ||
chunkOverlap: 0, | ||
}), | ||
childK: 50, | ||
parentK: 5, | ||
// We add Reranker | ||
documentCompressor: reranker, | ||
documentCompressorFilteringFn: documentCompressorFiltering({ | ||
relevanceScore: 0.3, | ||
}), | ||
}); | ||
|
||
const docs = jimDocs.concat(pamDocs); | ||
await retriever.addDocuments(docs); | ||
|
||
// This will search for documents in vector store and return for LLM already reranked and sorted document | ||
// with appropriate minimum relevance score | ||
const retrievedDocs = await retriever.getRelevantDocuments( | ||
"What is Pam's favorite color?" | ||
); | ||
|
||
// Pam's favorite color is returned first! | ||
console.log(JSON.stringify(retrievedDocs, null, 2)); | ||
/* | ||
[ | ||
{ | ||
"pageContent": "My favorite color is red.", | ||
"metadata": { | ||
"relevanceScore": 0.9 | ||
"loc": { | ||
"lines": { | ||
"from": 1, | ||
"to": 1 | ||
} | ||
} | ||
} | ||
} | ||
] | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters