Skip to content

Commit

Permalink
Initial implementation of a Google Cloud Storage (GCS) data store.
Browse files Browse the repository at this point in the history
  • Loading branch information
afirstenberg committed Jun 6, 2023
1 parent 0f38158 commit 58d4950
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
5 changes: 5 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@
"@getmetal/metal-sdk": "^4.0.0",
"@getzep/zep-js": "^0.3.1",
"@gomomento/sdk": "^1.23.0",
"@google-cloud/storage": "^6.10.1",
"@huggingface/inference": "^1.5.1",
"@jest/globals": "^29.5.0",
"@opensearch-project/opensearch": "^2.2.0",
Expand Down Expand Up @@ -484,6 +485,7 @@
"@getmetal/metal-sdk": "*",
"@getzep/zep-js": "^0.3.1",
"@gomomento/sdk": "^1.23.0",
"@google-cloud/storage": "^6.10.1",
"@huggingface/inference": "^1.5.1",
"@opensearch-project/opensearch": "*",
"@pinecone-database/pinecone": "*",
Expand Down Expand Up @@ -548,6 +550,9 @@
"@gomomento/sdk": {
"optional": true
},
"@google-cloud/storage": {
"optional": true
},
"@huggingface/inference": {
"optional": true
},
Expand Down
67 changes: 67 additions & 0 deletions langchain/src/docstore/googlecloudstorage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { Storage, File } from "@google-cloud/storage";

import { Document } from "../document.js";
import { Docstore } from "./base.js";

export interface GoogleCloudStorageDocstoreConfiguration {
/** The identifier for the GCS bucket */
bucket: string;

/**
* An optional prefix to prepend to each object name.
* Often used to create a pseudo-hierarchy.
*/
prefix?: string;
}

export class GoogleCloudStorageDocstore extends Docstore {
bucket: string;

prefix = "";

storage: Storage;

constructor(config: GoogleCloudStorageDocstoreConfiguration) {
super();

this.bucket = config.bucket;
this.prefix = config.prefix ?? this.prefix;

this.storage = new Storage();
}

async search(search: string): Promise<Document> {
const file = this.getFile(search);

const [fileMetadata] = await file.getMetadata();
const metadata = fileMetadata?.metadata;

const [dataBuffer] = await file.download();
const pageContent = dataBuffer.toString();

const ret = new Document({
pageContent,
metadata,
});

return ret;
}

async add(texts: Record<string, Document>): Promise<void> {
await Promise.all(
Object.keys(texts).map((key) => this.addDocument(key, texts[key]))
);
}

async addDocument(name: string, document: Document): Promise<void> {
const file = this.getFile(name);
await file.save(document.pageContent);
await file.setMetadata(document.metadata);
}

private getFile(name: string): File {
const filename = this.prefix + name;
const file = this.storage.bucket(this.bucket).file(filename);
return file;
}
}

0 comments on commit 58d4950

Please sign in to comment.