Skip to content

Commit

Permalink
Creating sonix audio transcription document loader integration (langc…
Browse files Browse the repository at this point in the history
…hain-ai#1917)

* Creating sonix audio transcription loader

* Rename to match convention, small docs updates

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
konstantinov-raft and jacoblee93 authored Jul 12, 2023
1 parent c70ff59 commit 981d86f
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
hide_table_of_contents: true
sidebar_class_name: node-only
---

# Sonix Audio

:::tip Compatibility
Only available on Node.js.
:::

This covers how to load document objects from an audio file using the [Sonix](https://sonix.ai/) API.

## Setup

To run this loader you will need to create an account on the https://sonix.ai/ and obtain an auth key from the https://my.sonix.ai/api page.

You'll also need to install the `sonix-speech-recognition` library:

```bash npm2yarn
npm install sonix-speech-recognition
```

## Usage

Once auth key is configured, you can use the loader to create transcriptions and then convert them into a Document.
In the `request` parameter, you can either specify a local file by setting `audioFilePath` or a remote file using `audioUrl`.
You will also need to specify the audio language. See the list of supported languages [here](https://sonix.ai/docs/api#languages).

import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/document_loaders/sonix_audio_transcription.ts";

<CodeBlock language="typescript">{Example}</CodeBlock>
14 changes: 14 additions & 0 deletions examples/src/document_loaders/sonix_audio_transcription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { SonixAudioTranscriptionLoader } from "langchain/document_loaders/web/sonix_audio_transcription";

const loader = new SonixAudioTranscriptionLoader({
sonixAuthKey: "SONIX_AUTH_KEY",
request: {
audioFilePath: "LOCAL_AUDIO_FILE_PATH",
fileName: "FILE_NAME",
language: "en",
},
});

const docs = await loader.load();

console.log(docs);
3 changes: 3 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ document_loaders/web/notionapi.d.ts
document_loaders/web/s3.cjs
document_loaders/web/s3.js
document_loaders/web/s3.d.ts
document_loaders/web/sonix_audio_transcription.cjs
document_loaders/web/sonix_audio_transcription.js
document_loaders/web/sonix_audio_transcription.d.ts
document_loaders/web/confluence.cjs
document_loaders/web/confluence.js
document_loaders/web/confluence.d.ts
Expand Down
13 changes: 13 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@
"document_loaders/web/s3.cjs",
"document_loaders/web/s3.js",
"document_loaders/web/s3.d.ts",
"document_loaders/web/sonix_audio_transcription.cjs",
"document_loaders/web/sonix_audio_transcription.js",
"document_loaders/web/sonix_audio_transcription.d.ts",
"document_loaders/web/confluence.cjs",
"document_loaders/web/confluence.js",
"document_loaders/web/confluence.d.ts",
Expand Down Expand Up @@ -554,6 +557,7 @@
"replicate": "^0.9.0",
"rimraf": "^5.0.1",
"rollup": "^3.19.1",
"sonix-speech-recognition": "^2.1.1",
"sqlite3": "^5.1.4",
"srt-parser-2": "^1.2.2",
"ts-jest": "^29.1.0",
Expand Down Expand Up @@ -612,6 +616,7 @@
"puppeteer": "^19.7.2",
"redis": "^4.6.4",
"replicate": "^0.9.0",
"sonix-speech-recognition": "^2.1.1",
"srt-parser-2": "^1.2.2",
"typeorm": "^0.3.12",
"typesense": "^1.5.3",
Expand Down Expand Up @@ -763,6 +768,9 @@
"replicate": {
"optional": true
},
"sonix-speech-recognition": {
"optional": true
},
"srt-parser-2": {
"optional": true
},
Expand Down Expand Up @@ -1248,6 +1256,11 @@
"import": "./document_loaders/web/s3.js",
"require": "./document_loaders/web/s3.cjs"
},
"./document_loaders/web/sonix_audio_transcription": {
"types": "./document_loaders/web/sonix_audio_transcription.d.ts",
"import": "./document_loaders/web/sonix_audio_transcription.js",
"require": "./document_loaders/web/sonix_audio_transcription.cjs"
},
"./document_loaders/web/confluence": {
"types": "./document_loaders/web/confluence.d.ts",
"import": "./document_loaders/web/confluence.js",
Expand Down
2 changes: 2 additions & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ const entrypoints = {
"document_loaders/web/notiondb": "document_loaders/web/notiondb",
"document_loaders/web/notionapi": "document_loaders/web/notionapi",
"document_loaders/web/s3": "document_loaders/web/s3",
"document_loaders/web/sonix_audio_transcription": "document_loaders/web/sonix_audio_transcription",
"document_loaders/web/confluence": "document_loaders/web/confluence",
"document_loaders/web/sort_xyz_blockchain": "document_loaders/web/sort_xyz_blockchain",
"document_loaders/fs/directory": "document_loaders/fs/directory",
Expand Down Expand Up @@ -249,6 +250,7 @@ const requiresOptionalDependency = [
"document_loaders/web/notiondb",
"document_loaders/web/notionapi",
"document_loaders/web/s3",
"document_loaders/web/sonix_audio_transcription",
"document_loaders/web/confluence",
"document_loaders/fs/directory",
"document_loaders/fs/buffer",
Expand Down
45 changes: 45 additions & 0 deletions langchain/src/document_loaders/web/sonix_audio_transcription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { SonixSpeechRecognitionService } from "sonix-speech-recognition";
import { SpeechToTextRequest } from "sonix-speech-recognition/lib/types.js";
import { Document } from "../../document.js";
import { BaseDocumentLoader } from "../base.js";

export class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
private readonly sonixSpeechRecognitionService: SonixSpeechRecognitionService;

private readonly speechToTextRequest: SpeechToTextRequest;

constructor({
sonixAuthKey,
request: speechToTextRequest,
}: {
sonixAuthKey: string;
request: SpeechToTextRequest;
}) {
super();
this.sonixSpeechRecognitionService = new SonixSpeechRecognitionService(
sonixAuthKey
);
this.speechToTextRequest = speechToTextRequest;
}

async load(): Promise<Document[]> {
const { text, status, error } =
await this.sonixSpeechRecognitionService.speechToText(
this.speechToTextRequest
);

if (status === "failed") {
console.error("Error:", error);
return [];
}

const document = new Document({
pageContent: text,
metadata: {
fileName: this.speechToTextRequest.fileName,
},
});

return [document];
}
}
1 change: 1 addition & 0 deletions langchain/src/load/import_constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export const optionalImportEntrypoints = [
"langchain/document_loaders/web/notiondb",
"langchain/document_loaders/web/notionapi",
"langchain/document_loaders/web/s3",
"langchain/document_loaders/web/sonix_audio_transcription",
"langchain/document_loaders/web/confluence",
"langchain/document_loaders/fs/directory",
"langchain/document_loaders/fs/buffer",
Expand Down
5 changes: 5 additions & 0 deletions langchain/src/load/import_type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ export interface OptionalImportMap {
"langchain/document_loaders/web/s3"?:
| typeof import("../document_loaders/web/s3.js")
| Promise<typeof import("../document_loaders/web/s3.js")>;
"langchain/document_loaders/web/sonix_audio_transcription"?:
| typeof import("../document_loaders/web/sonix_audio_transcription.js")
| Promise<
typeof import("../document_loaders/web/sonix_audio_transcription.js")
>;
"langchain/document_loaders/web/confluence"?:
| typeof import("../document_loaders/web/confluence.js")
| Promise<typeof import("../document_loaders/web/confluence.js")>;
Expand Down
1 change: 1 addition & 0 deletions langchain/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
"src/document_loaders/web/notiondb.ts",
"src/document_loaders/web/notionapi.ts",
"src/document_loaders/web/s3.ts",
"src/document_loaders/web/sonix_audio_transcription.ts",
"src/document_loaders/web/confluence.ts",
"src/document_loaders/web/sort_xyz_blockchain.ts",
"src/document_loaders/fs/directory.ts",
Expand Down
14 changes: 14 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -19662,6 +19662,7 @@ __metadata:
replicate: ^0.9.0
rimraf: ^5.0.1
rollup: ^3.19.1
sonix-speech-recognition: ^2.1.1
sqlite3: ^5.1.4
srt-parser-2: ^1.2.2
ts-jest: ^29.1.0
Expand Down Expand Up @@ -19723,6 +19724,7 @@ __metadata:
puppeteer: ^19.7.2
redis: ^4.6.4
replicate: ^0.9.0
sonix-speech-recognition: ^2.1.1
srt-parser-2: ^1.2.2
typeorm: ^0.3.12
typesense: ^1.5.3
Expand Down Expand Up @@ -19825,6 +19827,8 @@ __metadata:
optional: true
replicate:
optional: true
sonix-speech-recognition:
optional: true
srt-parser-2:
optional: true
typeorm:
Expand Down Expand Up @@ -25985,6 +25989,16 @@ __metadata:
languageName: node
linkType: hard

"sonix-speech-recognition@npm:^2.1.1":
version: 2.1.1
resolution: "sonix-speech-recognition@npm:2.1.1"
dependencies:
axios: ^1.4.0
form-data: ^4.0.0
checksum: 67ffdf64fc90bab3556fb2a1c327e2d2ee0b3b9492d1f9fe13dd94125f9e1171dce5cc527fa9a08aba3c28d35d5595b16c3e5fca11317321e60720ec233b0d6b
languageName: node
linkType: hard

"sort-css-media-queries@npm:2.1.0":
version: 2.1.0
resolution: "sort-css-media-queries@npm:2.1.0"
Expand Down

0 comments on commit 981d86f

Please sign in to comment.