Skip to content

Commit

Permalink
Add Vespa retriever (langchain-ai#1087)
Browse files Browse the repository at this point in the history
* Add Vespa retriever

* Small fixes and code review changes

* Fix formatting issues in examples

* Fix docs

* Add port to allow connecting to local instances, fix typos in docs

* Just use the raw URL for VespaRetriever

---------

Co-authored-by: Tat Dat Duong <[email protected]>
Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
3 people authored Jun 1, 2023
1 parent 8d774e0 commit d772368
Show file tree
Hide file tree
Showing 14 changed files with 148 additions and 0 deletions.
27 changes: 27 additions & 0 deletions docs/docs/modules/indexes/retrievers/vespa-retriever.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Vespa Retriever

This shows how to use Vespa.ai as a LangChain retriever.
Vespa.ai is a platform for highly efficient structured text and vector search.
Please refer to [Vespa.ai](https://vespa.ai) for more information.

The following sets up a retriever that fetches results from Vespa's documentation search:

import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/retrievers/vespa.ts";

<CodeBlock language="typescript">{Example}</CodeBlock>

Here, up to 5 results are retrieved from the `content` field in the `paragraph` document type,
using `documentation` as the ranking method. The `userQuery()` is replaced with the actual query
passed from LangChain.

Please refer to the [pyvespa documentation](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa.html#Query)
for more information.

The URL is the endpoint of the Vespa application.
You can connect to any Vespa endpoint, either a remote service or a local instance using Docker.
However, most Vespa Cloud instances are protected with mTLS.
If this is your case, you can, for instance set up a [CloudFlare Worker](https://cloud.vespa.ai/en/security/cloudflare-workers)
that contains the necessary credentials to connect to the instance.

Now you can return the results and continue using them in LangChain.
22 changes: 22 additions & 0 deletions examples/src/retrievers/vespa.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { VespaRetriever } from "langchain/retrievers/vespa";

export const run = async () => {
const url = "https://doc-search.vespa.oath.cloud";
const query_body = {
yql: "select content from paragraph where userQuery()",
hits: 5,
ranking: "documentation",
locale: "en-us",
};
const content_field = "content";

const retriever = new VespaRetriever({
url,
auth: false,
query_body,
content_field,
});

const result = await retriever.getRelevantDocuments("what is vespa?");
console.log(result);
};
3 changes: 3 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ retrievers/hyde.d.ts
retrievers/self_query.cjs
retrievers/self_query.js
retrievers/self_query.d.ts
retrievers/vespa.cjs
retrievers/vespa.js
retrievers/vespa.d.ts
cache.cjs
cache.js
cache.d.ts
Expand Down
8 changes: 8 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@
"retrievers/self_query.cjs",
"retrievers/self_query.js",
"retrievers/self_query.d.ts",
"retrievers/vespa.cjs",
"retrievers/vespa.js",
"retrievers/vespa.d.ts",
"cache.cjs",
"cache.js",
"cache.d.ts",
Expand Down Expand Up @@ -1195,6 +1198,11 @@
"import": "./retrievers/self_query.js",
"require": "./retrievers/self_query.cjs"
},
"./retrievers/vespa": {
"types": "./retrievers/vespa.d.ts",
"import": "./retrievers/vespa.js",
"require": "./retrievers/vespa.cjs"
},
"./cache": {
"types": "./cache.d.ts",
"import": "./cache.js",
Expand Down
1 change: 1 addition & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ const entrypoints = {
"retrievers/document_compressors/chain_extract",
"retrievers/hyde": "retrievers/hyde",
"retrievers/self_query": "retrievers/self_query/index",
"retrievers/vespa": "retrievers/vespa",
// cache
cache: "cache/index",
"cache/momento": "cache/momento",
Expand Down
27 changes: 27 additions & 0 deletions langchain/src/retrievers/tests/vespa.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import { test, expect } from "@jest/globals";

import { VespaRetriever } from "../vespa.js";

test("VespaRetriever", async () => {
const url = process.env.VESPA_URL!;
const query_body = {
yql: "select * from music where album contains 'head';",
hits: 5,
locale: "en-us",
};
const content_field = "album";

const retriever = new VespaRetriever({
url,
auth: false,
query_body,
content_field,
});

const docs = await retriever.getRelevantDocuments("what is vespa?");
expect(docs.length).toBeGreaterThan(0);

console.log(docs);
});
53 changes: 53 additions & 0 deletions langchain/src/retrievers/vespa.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { Document } from "../document.js";
import {
RemoteRetriever,
RemoteRetrieverValues,
RemoteRetrieverParams,
} from "./remote/base.js";

export interface VespaRetrieverParams extends RemoteRetrieverParams {
/**
* The body of the query to send to Vespa
*/
query_body: object;
/**
* The name of the field the content resides in
*/
content_field: string;
}

export class VespaRetriever extends RemoteRetriever {
query_body: object;

content_field: string;

constructor({ query_body, content_field, ...rest }: VespaRetrieverParams) {
super(rest);
this.query_body = query_body;
this.content_field = content_field;

this.url = `${this.url}/search/?`;
}

createJsonBody(query: string): RemoteRetrieverValues {
return {
...this.query_body,
query,
};
}

processJsonResponse(json: RemoteRetrieverValues): Document[] {
return json.root.children.map(
(doc: {
id: string;
relevance: number;
source: string;
fields: Record<string, unknown>;
}) =>
new Document({
pageContent: doc.fields[this.content_field] as string,
metadata: { id: doc.id },
})
);
}
}
1 change: 1 addition & 0 deletions langchain/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@
"src/retrievers/document_compressors/chain_extract.ts",
"src/retrievers/hyde.ts",
"src/retrievers/self_query/index.ts",
"src/retrievers/vespa.ts",
"src/cache/index.ts",
"src/cache/momento.ts",
"src/cache/redis.ts",
Expand Down
1 change: 1 addition & 0 deletions test-exports-cf/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export * from "langchain/retrievers/document_compressors";
export * from "langchain/retrievers/time_weighted";
export * from "langchain/retrievers/document_compressors/chain_extract";
export * from "langchain/retrievers/hyde";
export * from "langchain/retrievers/vespa";
export * from "langchain/cache";
export * from "langchain/stores/file/in_memory";
export * from "langchain/experimental/autogpt";
Expand Down
1 change: 1 addition & 0 deletions test-exports-cjs/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const retrievers_document_compressors = require("langchain/retrievers/document_c
const retrievers_time_weighted = require("langchain/retrievers/time_weighted");
const retrievers_document_compressors_chain_extract = require("langchain/retrievers/document_compressors/chain_extract");
const retrievers_hyde = require("langchain/retrievers/hyde");
const retrievers_vespa = require("langchain/retrievers/vespa");
const cache = require("langchain/cache");
const stores_file_in_memory = require("langchain/stores/file/in_memory");
const experimental_autogpt = require("langchain/experimental/autogpt");
Expand Down
1 change: 1 addition & 0 deletions test-exports-cra/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export * from "langchain/retrievers/document_compressors";
export * from "langchain/retrievers/time_weighted";
export * from "langchain/retrievers/document_compressors/chain_extract";
export * from "langchain/retrievers/hyde";
export * from "langchain/retrievers/vespa";
export * from "langchain/cache";
export * from "langchain/stores/file/in_memory";
export * from "langchain/experimental/autogpt";
Expand Down
1 change: 1 addition & 0 deletions test-exports-esm/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import * as retrievers_document_compressors from "langchain/retrievers/document_
import * as retrievers_time_weighted from "langchain/retrievers/time_weighted";
import * as retrievers_document_compressors_chain_extract from "langchain/retrievers/document_compressors/chain_extract";
import * as retrievers_hyde from "langchain/retrievers/hyde";
import * as retrievers_vespa from "langchain/retrievers/vespa";
import * as cache from "langchain/cache";
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
import * as experimental_autogpt from "langchain/experimental/autogpt";
Expand Down
1 change: 1 addition & 0 deletions test-exports-vercel/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export * from "langchain/retrievers/document_compressors";
export * from "langchain/retrievers/time_weighted";
export * from "langchain/retrievers/document_compressors/chain_extract";
export * from "langchain/retrievers/hyde";
export * from "langchain/retrievers/vespa";
export * from "langchain/cache";
export * from "langchain/stores/file/in_memory";
export * from "langchain/experimental/autogpt";
Expand Down
1 change: 1 addition & 0 deletions test-exports-vite/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export * from "langchain/retrievers/document_compressors";
export * from "langchain/retrievers/time_weighted";
export * from "langchain/retrievers/document_compressors/chain_extract";
export * from "langchain/retrievers/hyde";
export * from "langchain/retrievers/vespa";
export * from "langchain/cache";
export * from "langchain/stores/file/in_memory";
export * from "langchain/experimental/autogpt";
Expand Down

0 comments on commit d772368

Please sign in to comment.