Skip to content

Commit

Permalink
Update chromasdk peer dep, fix issues with using existing collections…
Browse files Browse the repository at this point in the history
…, update docs
  • Loading branch information
nfcampos committed Apr 23, 2023
1 parent 7e21a62 commit 43789f7
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 120 deletions.
65 changes: 0 additions & 65 deletions docs/docs/modules/indexes/vector_stores/integrations/chroma.md

This file was deleted.

32 changes: 32 additions & 0 deletions docs/docs/modules/indexes/vector_stores/integrations/chroma.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import CodeBlock from "@theme/CodeBlock";

# Chroma

Chroma is an open-source Apache 2.0 embedding database.

## Setup

1. Run chroma with Docker on your computer [docs](https://docs.trychroma.com/api-reference)
2. Install the Chroma JS SDK.

```bash npm2yarn
npm install -S chromadb
```

## Usage, Index and query Documents

import FromDocs from "@examples/indexes/vector_stores/chroma/fromDocs.ts";

<CodeBlock language="typescript">{FromDocs}</CodeBlock>

## Usage, Index and query texts

import FromTexts from "@examples/indexes/vector_stores/chroma/fromTexts.ts";

<CodeBlock language="typescript">{FromTexts}</CodeBlock>

## Usage, Query docs from existing collection

import Search from "@examples/indexes/vector_stores/chroma/search.ts";

<CodeBlock language="typescript">{Search}</CodeBlock>
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"@supabase/supabase-js": "^2.10.0",
"@zilliz/milvus2-sdk-node": "^2.2.0",
"axios": "^0.26.0",
"chromadb": "^1.3.0",
"chromadb": "^1.4.0",
"graphql": "^16.6.0",
"js-yaml": "^4.1.0",
"langchain": "workspace:*",
Expand Down
25 changes: 25 additions & 0 deletions examples/src/indexes/vector_stores/chroma/fromDocs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { Chroma } from "langchain/vectorstores/chroma";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { TextLoader } from "langchain/document_loaders/fs/text";

// Create docs with a loader
const loader = new TextLoader("src/document_loaders/example_data/example.txt");
const docs = await loader.load();

// Create vector store and index the docs
const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), {
collectionName: "a-test-collection",
});

// Search for the most similar document
const response = await vectorStore.similaritySearch("hello", 1);

console.log(response);
/*
[
Document {
pageContent: 'Foo\nBar\nBaz\n\n',
metadata: { source: 'src/document_loaders/example_data/example.txt' }
}
]
*/
36 changes: 36 additions & 0 deletions examples/src/indexes/vector_stores/chroma/fromTexts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { Chroma } from "langchain/vectorstores/chroma";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";

// text sample from Godel, Escher, Bach
const vectorStore = await Chroma.fromTexts(
[
`Tortoise: Labyrinth? Labyrinth? Could it Are we in the notorious Little
Harmonic Labyrinth of the dreaded Majotaur?`,
"Achilles: Yiikes! What is that?",
`Tortoise: They say-although I person never believed it myself-that an I
Majotaur has created a tiny labyrinth sits in a pit in the middle of
it, waiting innocent victims to get lost in its fears complexity.
Then, when they wander and dazed into the center, he laughs and
laughs at them-so hard, that he laughs them to death!`,
"Achilles: Oh, no!",
"Tortoise: But it's only a myth. Courage, Achilles.",
],
[{ id: 2 }, { id: 1 }, { id: 3 }],
new OpenAIEmbeddings(),
{
collectionName: "godel-escher-bach",
}
);

const response = await vectorStore.similaritySearch("scared", 2);

console.log(response);
/*
[
Document { pageContent: 'Achilles: Oh, no!', metadata: {} },
Document {
pageContent: 'Achilles: Yiikes! What is that?',
metadata: { id: 1 }
}
]
*/
19 changes: 19 additions & 0 deletions examples/src/indexes/vector_stores/chroma/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { Chroma } from "langchain/vectorstores/chroma";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";

const vectorStore = await Chroma.fromExistingCollection(
new OpenAIEmbeddings(),
{ collectionName: "godel-escher-bach" }
);

const response = await vectorStore.similaritySearch("scared", 2);
console.log(response);
/*
[
Document { pageContent: 'Achilles: Oh, no!', metadata: {} },
Document {
pageContent: 'Achilles: Yiikes! What is that?',
metadata: { id: 1 }
}
]
*/
20 changes: 8 additions & 12 deletions examples/src/indexes/vector_stores/hnswlib_fromdocs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,13 @@ import { HNSWLib } from "langchain/vectorstores/hnswlib";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { TextLoader } from "langchain/document_loaders/fs/text";

export const run = async () => {
// Create docs with a loader
const loader = new TextLoader(
"src/document_loaders/example_data/example.txt"
);
const docs = await loader.load();
// Create docs with a loader
const loader = new TextLoader("src/document_loaders/example_data/example.txt");
const docs = await loader.load();

// Load the docs into the vector store
const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
// Load the docs into the vector store
const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());

// Search for the most similar document
const resultOne = await vectorStore.similaritySearch("hello world", 1);
console.log(resultOne);
};
// Search for the most similar document
const result = await vectorStore.similaritySearch("hello world", 1);
console.log(result);
3 changes: 2 additions & 1 deletion langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@
"@typescript-eslint/parser": "^5.51.0",
"axios": "^0.26.0",
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.4.0",
"cohere-ai": "^5.0.2",
"d3-dsv": "^2.0.0",
"dotenv": "^16.0.3",
Expand Down Expand Up @@ -346,7 +347,7 @@
"@zilliz/milvus2-sdk-node": "^2.2.0",
"axios": "^0.26.0",
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.3.0",
"chromadb": "^1.4.0",
"cohere-ai": "^5.0.2",
"d3-dsv": "^2.0.0",
"epub2": "^3.0.1",
Expand Down
68 changes: 33 additions & 35 deletions langchain/src/vectorstores/chroma.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
import { v4 as uuidv4 } from "uuid";
import type { ChromaClient as ChromaClientT } from "chromadb";
import type { ChromaClient as ChromaClientT, Collection } from "chromadb";

import { Embeddings } from "../embeddings/base.js";
import { VectorStore } from "./base.js";
import { Document } from "../document.js";

export interface ChromaLibArgs {
url?: string;
numDimensions?: number;
collectionName?: string;
index?: ChromaClientT;
}
export type ChromaLibArgs =
| {
url?: string;
numDimensions?: number;
collectionName?: string;
}
| {
index?: ChromaClientT;
numDimensions?: number;
collectionName?: string;
};

export class Chroma extends VectorStore {
index?: ChromaClientT;

collection?: Collection;

collectionName: string;

numDimensions?: number;
Expand All @@ -23,11 +30,14 @@ export class Chroma extends VectorStore {

constructor(embeddings: Embeddings, args: ChromaLibArgs) {
super(embeddings, args);
this.index = args.index;
this.numDimensions = args.numDimensions;
this.embeddings = embeddings;
this.collectionName = ensureCollectionName(args.collectionName);
this.url = args.url || "http://localhost:8000";
if ("index" in args) {
this.index = args.index;
} else if ("url" in args) {
this.url = args.url || "http://localhost:8000";
}
}

async addDocuments(documents: Document[]): Promise<void> {
Expand All @@ -38,33 +48,26 @@ export class Chroma extends VectorStore {
);
}

async ensureCollection() {
if (!this.index) {
const { ChromaClient } = await Chroma.imports();
this.index = new ChromaClient(this.url);
try {
await this.index.createCollection(this.collectionName);
} catch {
// ignore error
async ensureCollection(): Promise<Collection> {
if (!this.collection) {
if (!this.index) {
const { ChromaClient } = await Chroma.imports();
this.index = new ChromaClient(this.url);
}
this.collection = await this.index.getOrCreateCollection(
this.collectionName
);
}

return this.collection;
}

async addVectors(vectors: number[][], documents: Document[]) {
if (vectors.length === 0) {
return;
}
if (!this.index) {
if (this.numDimensions === undefined) {
this.numDimensions = vectors[0].length;
}
const { ChromaClient } = await Chroma.imports();
this.index = new ChromaClient(this.url);
try {
await this.index.createCollection(this.collectionName);
} catch {
// ignore error
}
if (this.numDimensions === undefined) {
this.numDimensions = vectors[0].length;
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and metadatas must have the same length`);
Expand All @@ -75,7 +78,7 @@ export class Chroma extends VectorStore {
);
}

const collection = await this.index.getCollection(this.collectionName);
const collection = await this.ensureCollection();
const docstoreSize = await collection.count();
await collection.add(
Array.from({ length: vectors.length }, (_, i) =>
Expand All @@ -88,12 +91,7 @@ export class Chroma extends VectorStore {
}

async similaritySearchVectorWithScore(query: number[], k: number) {
if (!this.index) {
throw new Error(
"Vector store not initialised yet. Try calling `addTexts` first."
);
}
const collection = await this.index.getCollection(this.collectionName);
const collection = await this.ensureCollection();

// similaritySearchVectorWithScore supports one query vector at a time
// chroma supports multiple query vectors at a time
Expand Down
13 changes: 7 additions & 6 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9858,12 +9858,12 @@ __metadata:
languageName: node
linkType: hard

"chromadb@npm:^1.3.0":
version: 1.3.1
resolution: "chromadb@npm:1.3.1"
"chromadb@npm:^1.4.0":
version: 1.4.1
resolution: "chromadb@npm:1.4.1"
dependencies:
axios: ^0.26.0
checksum: c3d2f766708d8a23f484717fa73fd2c255350652a4706b1b96becb031501648bff243337982190d0cd7d57c706df96a7de1c3ce331385dc452f183ed734957ee
checksum: 2b8e02e86577947e9f32d3faadb614b1a0ce60114d383ba7dd9997b519b1ff8e0b38f1a5c132371636ab07ed53f57b45cde3b454bcaa6710d5468358e1634114
languageName: node
linkType: hard

Expand Down Expand Up @@ -13008,7 +13008,7 @@ __metadata:
"@typescript-eslint/parser": ^5.51.0
"@zilliz/milvus2-sdk-node": ^2.2.0
axios: ^0.26.0
chromadb: ^1.3.0
chromadb: ^1.4.0
dotenv: ^16.0.3
eslint: ^8.33.0
eslint-config-airbnb-base: ^15.0.0
Expand Down Expand Up @@ -17151,6 +17151,7 @@ __metadata:
binary-extensions: ^2.2.0
browser-or-node: ^2.1.1
cheerio: ^1.0.0-rc.12
chromadb: ^1.4.0
cohere-ai: ^5.0.2
d3-dsv: ^2.0.0
dotenv: ^16.0.3
Expand Down Expand Up @@ -17206,7 +17207,7 @@ __metadata:
"@zilliz/milvus2-sdk-node": ^2.2.0
axios: ^0.26.0
cheerio: ^1.0.0-rc.12
chromadb: ^1.3.0
chromadb: ^1.4.0
cohere-ai: ^5.0.2
d3-dsv: ^2.0.0
epub2: ^3.0.1
Expand Down

0 comments on commit 43789f7

Please sign in to comment.