Creating sonix audio transcription document loader integration (langc…

…hain-ai#1917) * Creating sonix audio transcription loader * Rename to match convention, small docs updates --------- Co-authored-by: jacoblee93 <[email protected]>
shcallaway · Jul 12, 2023 · 981d86f · 981d86f
1 parent c70ff59
commit 981d86f
Show file tree

Hide file tree

Showing 10 changed files with 131 additions and 0 deletions.
diff --git a/...les/indexes/document_loaders/examples/web_loaders/sonix_audio_transcription.mdx b/...les/indexes/document_loaders/examples/web_loaders/sonix_audio_transcription.mdx
@@ -0,0 +1,33 @@
+---
+hide_table_of_contents: true
+sidebar_class_name: node-only
+---
+
+# Sonix Audio
+
+:::tip Compatibility
+Only available on Node.js.
+:::
+
+This covers how to load document objects from an audio file using the [Sonix](https://sonix.ai/) API.
+
+## Setup
+
+To run this loader you will need to create an account on the https://sonix.ai/ and obtain an auth key from the https://my.sonix.ai/api page.
+
+You'll also need to install the `sonix-speech-recognition` library:
+
+```bash npm2yarn
+npm install sonix-speech-recognition
+```
+
+## Usage
+
+Once auth key is configured, you can use the loader to create transcriptions and then convert them into a Document.
+In the `request` parameter, you can either specify a local file by setting `audioFilePath` or a remote file using `audioUrl`.
+You will also need to specify the audio language. See the list of supported languages [here](https://sonix.ai/docs/api#languages).
+
+import CodeBlock from "@theme/CodeBlock";
+import Example from "@examples/document_loaders/sonix_audio_transcription.ts";
+
+<CodeBlock language="typescript">{Example}</CodeBlock>
diff --git a/examples/src/document_loaders/sonix_audio_transcription.ts b/examples/src/document_loaders/sonix_audio_transcription.ts
@@ -0,0 +1,14 @@
+import { SonixAudioTranscriptionLoader } from "langchain/document_loaders/web/sonix_audio_transcription";
+
+const loader = new SonixAudioTranscriptionLoader({
+  sonixAuthKey: "SONIX_AUTH_KEY",
+  request: {
+    audioFilePath: "LOCAL_AUDIO_FILE_PATH",
+    fileName: "FILE_NAME",
+    language: "en",
+  },
+});
+
+const docs = await loader.load();
+
+console.log(docs);
diff --git a/langchain/.gitignore b/langchain/.gitignore
@@ -247,6 +247,9 @@ document_loaders/web/notionapi.d.ts
 document_loaders/web/s3.cjs
 document_loaders/web/s3.js
 document_loaders/web/s3.d.ts
+document_loaders/web/sonix_audio_transcription.cjs
+document_loaders/web/sonix_audio_transcription.js
+document_loaders/web/sonix_audio_transcription.d.ts
 document_loaders/web/confluence.cjs
 document_loaders/web/confluence.js
 document_loaders/web/confluence.d.ts

diff --git a/langchain/package.json b/langchain/package.json
@@ -259,6 +259,9 @@
     "document_loaders/web/s3.cjs",
     "document_loaders/web/s3.js",
     "document_loaders/web/s3.d.ts",
+    "document_loaders/web/sonix_audio_transcription.cjs",
+    "document_loaders/web/sonix_audio_transcription.js",
+    "document_loaders/web/sonix_audio_transcription.d.ts",
     "document_loaders/web/confluence.cjs",
     "document_loaders/web/confluence.js",
     "document_loaders/web/confluence.d.ts",
@@ -554,6 +557,7 @@
     "replicate": "^0.9.0",
     "rimraf": "^5.0.1",
     "rollup": "^3.19.1",
+    "sonix-speech-recognition": "^2.1.1",
     "sqlite3": "^5.1.4",
     "srt-parser-2": "^1.2.2",
     "ts-jest": "^29.1.0",
@@ -612,6 +616,7 @@
     "puppeteer": "^19.7.2",
     "redis": "^4.6.4",
     "replicate": "^0.9.0",
+    "sonix-speech-recognition": "^2.1.1",
     "srt-parser-2": "^1.2.2",
     "typeorm": "^0.3.12",
     "typesense": "^1.5.3",
@@ -763,6 +768,9 @@
     "replicate": {
       "optional": true
     },
+    "sonix-speech-recognition": {
+      "optional": true
+    },
     "srt-parser-2": {
       "optional": true
     },
@@ -1248,6 +1256,11 @@
       "import": "./document_loaders/web/s3.js",
       "require": "./document_loaders/web/s3.cjs"
     },
+    "./document_loaders/web/sonix_audio_transcription": {
+      "types": "./document_loaders/web/sonix_audio_transcription.d.ts",
+      "import": "./document_loaders/web/sonix_audio_transcription.js",
+      "require": "./document_loaders/web/sonix_audio_transcription.cjs"
+    },
     "./document_loaders/web/confluence": {
       "types": "./document_loaders/web/confluence.d.ts",
       "import": "./document_loaders/web/confluence.js",

diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js
@@ -104,6 +104,7 @@ const entrypoints = {
   "document_loaders/web/notiondb": "document_loaders/web/notiondb",
   "document_loaders/web/notionapi": "document_loaders/web/notionapi",
   "document_loaders/web/s3": "document_loaders/web/s3",
+  "document_loaders/web/sonix_audio_transcription": "document_loaders/web/sonix_audio_transcription",
   "document_loaders/web/confluence": "document_loaders/web/confluence",
   "document_loaders/web/sort_xyz_blockchain": "document_loaders/web/sort_xyz_blockchain",
   "document_loaders/fs/directory": "document_loaders/fs/directory",
@@ -249,6 +250,7 @@ const requiresOptionalDependency = [
   "document_loaders/web/notiondb",
   "document_loaders/web/notionapi",
   "document_loaders/web/s3",
+  "document_loaders/web/sonix_audio_transcription",
   "document_loaders/web/confluence",
   "document_loaders/fs/directory",
   "document_loaders/fs/buffer",

diff --git a/langchain/src/document_loaders/web/sonix_audio_transcription.ts b/langchain/src/document_loaders/web/sonix_audio_transcription.ts
@@ -0,0 +1,45 @@
+import { SonixSpeechRecognitionService } from "sonix-speech-recognition";
+import { SpeechToTextRequest } from "sonix-speech-recognition/lib/types.js";
+import { Document } from "../../document.js";
+import { BaseDocumentLoader } from "../base.js";
+
+export class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
+  private readonly sonixSpeechRecognitionService: SonixSpeechRecognitionService;
+
+  private readonly speechToTextRequest: SpeechToTextRequest;
+
+  constructor({
+    sonixAuthKey,
+    request: speechToTextRequest,
+  }: {
+    sonixAuthKey: string;
+    request: SpeechToTextRequest;
+  }) {
+    super();
+    this.sonixSpeechRecognitionService = new SonixSpeechRecognitionService(
+      sonixAuthKey
+    );
+    this.speechToTextRequest = speechToTextRequest;
+  }
+
+  async load(): Promise<Document[]> {
+    const { text, status, error } =
+      await this.sonixSpeechRecognitionService.speechToText(
+        this.speechToTextRequest
+      );
+
+    if (status === "failed") {
+      console.error("Error:", error);
+      return [];
+    }
+
+    const document = new Document({
+      pageContent: text,
+      metadata: {
+        fileName: this.speechToTextRequest.fileName,
+      },
+    });
+
+    return [document];
+  }
+}
diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts
@@ -57,6 +57,7 @@ export const optionalImportEntrypoints = [
   "langchain/document_loaders/web/notiondb",
   "langchain/document_loaders/web/notionapi",
   "langchain/document_loaders/web/s3",
+  "langchain/document_loaders/web/sonix_audio_transcription",
   "langchain/document_loaders/web/confluence",
   "langchain/document_loaders/fs/directory",
   "langchain/document_loaders/fs/buffer",

diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts
@@ -169,6 +169,11 @@ export interface OptionalImportMap {
   "langchain/document_loaders/web/s3"?:
     | typeof import("../document_loaders/web/s3.js")
     | Promise<typeof import("../document_loaders/web/s3.js")>;
+  "langchain/document_loaders/web/sonix_audio_transcription"?:
+    | typeof import("../document_loaders/web/sonix_audio_transcription.js")
+    | Promise<
+        typeof import("../document_loaders/web/sonix_audio_transcription.js")
+      >;
   "langchain/document_loaders/web/confluence"?:
     | typeof import("../document_loaders/web/confluence.js")
     | Promise<typeof import("../document_loaders/web/confluence.js")>;

diff --git a/langchain/tsconfig.json b/langchain/tsconfig.json
@@ -111,6 +111,7 @@
       "src/document_loaders/web/notiondb.ts",
       "src/document_loaders/web/notionapi.ts",
       "src/document_loaders/web/s3.ts",
+      "src/document_loaders/web/sonix_audio_transcription.ts",
       "src/document_loaders/web/confluence.ts",
       "src/document_loaders/web/sort_xyz_blockchain.ts",
       "src/document_loaders/fs/directory.ts",

diff --git a/yarn.lock b/yarn.lock
@@ -19662,6 +19662,7 @@ __metadata:
     replicate: ^0.9.0
     rimraf: ^5.0.1
     rollup: ^3.19.1
+    sonix-speech-recognition: ^2.1.1
     sqlite3: ^5.1.4
     srt-parser-2: ^1.2.2
     ts-jest: ^29.1.0
@@ -19723,6 +19724,7 @@ __metadata:
     puppeteer: ^19.7.2
     redis: ^4.6.4
     replicate: ^0.9.0
+    sonix-speech-recognition: ^2.1.1
     srt-parser-2: ^1.2.2
     typeorm: ^0.3.12
     typesense: ^1.5.3
@@ -19825,6 +19827,8 @@ __metadata:
       optional: true
     replicate:
       optional: true
+    sonix-speech-recognition:
+      optional: true
     srt-parser-2:
       optional: true
     typeorm:
@@ -25985,6 +25989,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"sonix-speech-recognition@npm:^2.1.1":
+  version: 2.1.1
+  resolution: "sonix-speech-recognition@npm:2.1.1"
+  dependencies:
+    axios: ^1.4.0
+    form-data: ^4.0.0
+  checksum: 67ffdf64fc90bab3556fb2a1c327e2d2ee0b3b9492d1f9fe13dd94125f9e1171dce5cc527fa9a08aba3c28d35d5595b16c3e5fca11317321e60720ec233b0d6b
+  languageName: node
+  linkType: hard
+
 "sort-css-media-queries@npm:2.1.0":
   version: 2.1.0
   resolution: "sort-css-media-queries@npm:2.1.0"