Skip to content

Commit

Permalink
typesense: add vector_distance as score (langchain-ai#1725)
Browse files Browse the repository at this point in the history
Typesense search provides the vector_distance in the search result:

https://typesense.org/docs/0.24.1/api/vector-search.html#nearest-neighbor-vector-search
  • Loading branch information
jaclar authored Jul 4, 2023
1 parent 90d7ede commit f4b1464
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 37 deletions.
54 changes: 33 additions & 21 deletions langchain/src/vectorstores/tests/typesense.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,40 +75,52 @@ test("typesenseRecordsToDocuments should return the correct langchain documents"

const typesenseRecords = [
{
text: "hello world",
foo: "fooo",
bar: "barr",
baz: "bazz",
vec: await embeddings.embedQuery("hello world"),
},
{
text: "hello world 2",
foo: "foooo",
bar: "barrr",
baz: "bazzz",
vec: await embeddings.embedQuery("hello world 2"),
},
];

const expected = [
{
metadata: {
document: {
text: "hello world",
foo: "fooo",
bar: "barr",
baz: "bazz",
vec: await embeddings.embedQuery("hello world"),
},
pageContent: "hello world",
vector_distance: 0.2342145,
},
{
metadata: {
document: {
text: "hello world 2",
foo: "foooo",
bar: "barrr",
baz: "bazzz",
vec: await embeddings.embedQuery("hello world 2"),
},
pageContent: "hello world 2",
vector_distance: 0.4521355,
},
];

const expected = [
[
{
metadata: {
foo: "fooo",
bar: "barr",
baz: "bazz",
},
pageContent: "hello world",
},
0.2342145,
],
[
{
metadata: {
foo: "foooo",
bar: "barrr",
baz: "bazzz",
},
pageContent: "hello world 2",
},
0.4521355,
],
];

expect(vectorstore._typesenseRecordsToDocuments(typesenseRecords)).toEqual(
expected
);
Expand Down
41 changes: 25 additions & 16 deletions langchain/src/vectorstores/typesense.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import type { Client } from "typesense";
import type { MultiSearchRequestSchema } from "typesense/lib/Typesense/MultiSearch.js";
import type {
SearchResponseHit,
DocumentSchema,
} from "typesense/lib/Typesense/Documents.js";
import type { Document } from "../document.js";
import { Embeddings } from "../embeddings/base.js";
import { VectorStore } from "./base.js";
import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js";

interface VectorSearchResponseHit<T extends DocumentSchema>
extends SearchResponseHit<T> {
vector_distance?: number;
}

/**
* Typesense vector store configuration.
*/
Expand Down Expand Up @@ -156,21 +165,23 @@ export class Typesense extends VectorStore {
* @returns documents
*/
_typesenseRecordsToDocuments(
typesenseRecords: Record<string, unknown>[] | undefined
): Document[] {
const documents =
typesenseRecords:
| { document?: Record<string, unknown>; vector_distance: number }[]
| undefined
): [Document, number][] {
const documents: [Document, number][] =
typesenseRecords?.map((hit) => {
const objectWithMetadatas: Record<string, unknown> = {};

const hitDoc = hit.document || {};
this.metadataColumnNames.forEach((metadataColumnName) => {
objectWithMetadatas[metadataColumnName] = hit[metadataColumnName];
objectWithMetadatas[metadataColumnName] = hitDoc[metadataColumnName];
});

const document: Document = {
pageContent: (hit[this.pageContentColumnName] as string) || "",
pageContent: (hitDoc[this.pageContentColumnName] as string) || "",
metadata: objectWithMetadatas,
};
return document;
return [document, hit.vector_distance];
}) || [];

return documents;
Expand Down Expand Up @@ -202,8 +213,6 @@ export class Typesense extends VectorStore {

/**
* Search for similar documents with their similarity score.
* All the documents have 0 as similarity score because Typesense API
* does not return the similarity score.
* @param vectorPrompt vector to search for
* @param k amount of results to return
* @returns similar documents with their similarity score
Expand All @@ -230,15 +239,15 @@ export class Typesense extends VectorStore {
{}
);
const results = typesenseResponse.results[0].hits;
const hits = results?.map((hit) => hit.document) as
| Record<string, unknown>[]
| undefined;

const documents = this._typesenseRecordsToDocuments(hits).map(
(doc) => [doc, 0] as [Document<Record<string, unknown>>, number]
);
const hits = results?.map((hit: VectorSearchResponseHit<object>) => ({
document: hit?.document || {},
vector_distance: hit?.vector_distance || 2,
})) as
| { document: Record<string, unknown>; vector_distance: number }[]
| undefined;

return documents;
return this._typesenseRecordsToDocuments(hits);
}

/**
Expand Down

0 comments on commit f4b1464

Please sign in to comment.