Skip to content

Commit

Permalink
[FIX] s3loader credential interface (langchain-ai#1557)
Browse files Browse the repository at this point in the history
* fix: s3loader credential interface update

* Backwards compatibility fixes, update docs

* Change unit test to integration test

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
QasimRRizvi and jacoblee93 authored Jun 29, 2023
1 parent b0e0035 commit 4a9e4fb
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ To run this index you'll need to have Unstructured already set up and ready to u

See the docs [here](https://js.langchain.com/docs/modules/indexes/document_loaders/examples/file_loaders/unstructured) for information on how to do that.

You'll also need to install the official AWS SDK:

```bash npm2yarn
npm install @aws-sdk/client-s3
```

## Usage

Once Unstructured is configured, you can use the S3 loader to load files and then convert them into a Document.
Expand Down
6 changes: 4 additions & 2 deletions examples/src/document_loaders/s3.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ const loader = new S3Loader({
key: "AccountingOverview.pdf",
s3Config: {
region: "us-east-1",
accessKeyId: "AKIAIOSFODNN7EXAMPLE",
secretAccessKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
credentials: {
accessKeyId: "AKIAIOSFODNN7EXAMPLE",
secretAccessKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
},
},
unstructuredAPIURL: "http://localhost:8000/general/v0/general",
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,49 +1,43 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import { test, jest, expect } from "@jest/globals";
import S3Client from "@aws-sdk/client-s3";
import * as fs from "node:fs";
import * as path from "node:path";
import { Readable } from "node:stream";
import { S3Loader } from "../web/s3.js";
import { UnstructuredLoader } from "../fs/unstructured.js";

const fsMock = {
...fs,
mkdtempSync: jest.fn().mockReturnValue("tmp/s3fileloader-12345"),
mkdirSync: jest.fn().mockImplementation(() => {}),
writeFileSync: jest.fn().mockImplementation(() => {}),
writeFileSync: jest.fn().mockImplementation((path, data) => {
console.log(`Writing "${(data as object).toString()}" to ${path}`);
}),
};

const UnstructuredLoaderMock = jest.fn().mockImplementation(() => ({
load: jest.fn().mockImplementation(() => ["fake document"]),
}));

jest.mock("@aws-sdk/client-s3", () => ({
S3Client: jest.fn().mockImplementation(() => ({
send: jest.fn().mockImplementation(() =>
Promise.resolve({
Body: new Readable({
read() {
this.push(Buffer.from("Mock file content"));
this.push(null);
},
}),
})
),
})),
GetObjectCommand: jest.fn(),
}));

test("Test S3 loader", async () => {
if (!S3Client) {
// this is to avoid a linting error. S3Client is mocked above.
}

const loader = new S3Loader({
bucket: "test-bucket-123",
key: "AccountingOverview.pdf",
bucket: process.env.AWS_S3_BUCKET_NAME!,
key: process.env.AWS_S3_KEY!,
unstructuredAPIURL: "http://localhost:8000/general/v0/general",
fs: fsMock as typeof fs,
UnstructuredLoader: UnstructuredLoaderMock as typeof UnstructuredLoader,
s3Config: {
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
},
region: process.env.AWS_REGION,
},
});

const result = await loader.load();
Expand All @@ -55,7 +49,7 @@ test("Test S3 loader", async () => {
expect(fsMock.mkdirSync).toHaveBeenCalled();
expect(fsMock.writeFileSync).toHaveBeenCalled();
expect(UnstructuredLoaderMock).toHaveBeenCalledWith(
path.join("tmp", "s3fileloader-12345", "AccountingOverview.pdf"),
path.join("tmp", "s3fileloader-12345", "test.txt"),
unstructuredOptions
);
expect(result).toEqual(["fake document"]);
Expand Down
49 changes: 23 additions & 26 deletions langchain/src/document_loaders/web/s3.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,44 @@ import * as fsDefault from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import { Readable } from "node:stream";
import { S3Client, GetObjectCommand, S3ClientConfig } from "@aws-sdk/client-s3";
import { BaseDocumentLoader } from "../base.js";
import { UnstructuredLoader as UnstructuredLoaderDefault } from "../fs/unstructured.js";

export type S3Config = S3ClientConfig & {
/** @deprecated Use the credentials object instead */
accessKeyId?: string;
/** @deprecated Use the credentials object instead */
secretAccessKey?: string;
};

export interface S3LoaderParams {
bucket: string;
key: string;
unstructuredAPIURL: string;
s3Config?: S3Config;

s3Config?: S3Config & {
/** @deprecated Use the credentials object instead */
accessKeyId?: string;
/** @deprecated Use the credentials object instead */
secretAccessKey?: string;
};
fs?: typeof fsDefault;
UnstructuredLoader?: typeof UnstructuredLoaderDefault;
}

interface S3Config {
region?: string;
accessKeyId?: string;
secretAccessKey?: string;
}

export class S3Loader extends BaseDocumentLoader {
private bucket: string;

private key: string;

private unstructuredAPIURL: string;

private s3Config: S3Config;
private s3Config: S3Config & {
/** @deprecated Use the credentials object instead */
accessKeyId?: string;
/** @deprecated Use the credentials object instead */
secretAccessKey?: string;
};

private _fs: typeof fsDefault;

Expand All @@ -52,8 +63,6 @@ export class S3Loader extends BaseDocumentLoader {
}

public async load() {
const { S3Client, GetObjectCommand } = await S3LoaderImports();

const tempDir = this._fs.mkdtempSync(
path.join(os.tmpdir(), "s3fileloader-")
);
Expand Down Expand Up @@ -86,9 +95,10 @@ export class S3Loader extends BaseDocumentLoader {
this._fs.mkdirSync(path.dirname(filePath), { recursive: true });

this._fs.writeFileSync(filePath, objectData);
} catch {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
throw new Error(
`Failed to download file ${this.key} from S3 bucket ${this.bucket}.`
`Failed to download file ${this.key} from S3 bucket ${this.bucket}: ${e.message}`
);
}

Expand All @@ -109,16 +119,3 @@ export class S3Loader extends BaseDocumentLoader {
}
}
}

async function S3LoaderImports() {
try {
const s3Module = await import("@aws-sdk/client-s3");

return s3Module as typeof s3Module;
} catch (e) {
console.error(e);
throw new Error(
"Failed to load @aws-sdk/client-s3'. Please install it eg. `yarn add @aws-sdk/client-s3`."
);
}
}

0 comments on commit 4a9e4fb

Please sign in to comment.