forked from watson-developer-cloud/node-sdk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsynthesize-stream.ts
242 lines (220 loc) · 8.81 KB
/
synthesize-stream.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
/**
* (C) Copyright IBM Corp. 2018, 2020.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
import { Agent, RequestOptions } from 'http';
import { Authenticator, qs } from 'ibm-cloud-sdk-core';
import { Readable, ReadableOptions } from 'stream';
import { w3cwebsocket as w3cWebSocket } from 'websocket';
import { SynthesizeWebSocketParams } from '../text-to-speech/v1';
import { extractTransactionId, processUserParameters } from './websocket-utils';
/**
* pipe()-able Node.js Readable stream - accepts text in the constructor and emits binary audio data in its 'message' events
*
* Cannot be instantiated directly, instead created by calling #synthesizeUsingWebSocket()
*
* Uses WebSockets under the hood.
* @param {Object} options
* @constructor
*/
class SynthesizeStream extends Readable {
static WEBSOCKET_ERROR: string = 'WebSocket error';
static WEBSOCKET_CONNECTION_ERROR: string = 'WebSocket connection error';
private options: SynthesizeStream.Options;
private authenticator: Authenticator;
private socket: w3cWebSocket;
private initialized: boolean;
/**
* pipe()-able Node.js Readable stream - accepts text and emits binary audio data in its 'message' events
*
* Uses WebSockets under the hood.
*
*
* Note that the WebSocket connection is not established until the first chunk of data is recieved. This allows for IAM token request management by the SDK.
*
* @param {Options} options
* @param {Authenticator} options.authenticator - Authenticator to add Authorization header
* @param {string} [options.serviceUrl] - Base url for service (default='wss://api.us-south.speech-to-text.watson.cloud.ibm.com')
* @param {OutgoingHttpHeaders} [options.headers] - Only works in Node.js, not in browsers. Allows for custom headers to be set, including an Authorization header (preventing the need for auth tokens)
* @param {boolean} [options.disableSslVerification] - If true, disable SSL verification for the WebSocket connection (default=false)
* @param {Agent} [options.agent] - custom http(s) agent, useful for using the sdk behind a proxy (Node only)
* @param {string} options.text - The text that us to be synthesized
* @param {string} options.accept - The requested format (MIME type) of the audio
* @param {string[]} [options.timings] - An array that specifies whether the service is to return word timing information for all strings of the input text
* @param {string} [options.accessToken] - Bearer token to put in query string
* @param {string} [options.watsonToken] - Valid Watson authentication token (for Cloud Foundry)
* @param {string} [options.voice] - The voice to use for the synthesis (default='en-US_MichaelVoice')
* @param {string} [options.customizationId] - The customization ID (GUID) of a custom voice model that is to be used for the synthesis
* @param {boolean} [options.xWatsonLearningOptOut] - Indicates whether IBM can use data that is sent over the connection to improve the service for future users (default=false)
* @param {string} [options.xWatsonMetadata] - Associates a customer ID with all data that is passed over the connection. The parameter accepts the argument customer_id={id}, where {id} is a random or generic string that is to be associated with the data
* @constructor
*/
constructor(options: SynthesizeStream.Options) {
super(options);
this.options = options;
this.initialized = false;
this.authenticator = options.authenticator;
}
initialize() {
const options = this.options;
// process query params
const queryParamsAllowed = [
'access_token',
'watson-token',
'voice',
'customization_id',
'x-watson-learning-opt-out',
'x-watson-metadata',
];
const queryParams = processUserParameters(options, queryParamsAllowed);
const queryString = qs.stringify(queryParams);
// synthesize the url
const url =
(options.serviceUrl || 'wss://api.us-south.text-to-speech.watson.cloud.ibm.com')
.replace(/^http/, 'ws') +
'/v1/synthesize?' +
queryString;
// add custom agent in the request options if given by user
// default request options to null
const { agent } = options;
const requestOptions: RequestOptions = agent ? { agent } : null;
const socket = (this.socket = new w3cWebSocket(
url,
null,
null,
options.headers,
requestOptions,
{ tlsOptions: { rejectUnauthorized: !options.disableSslVerification }}
));
// use class context within arrow functions
const self = this;
socket.onopen = () => {
// process the payload params
const payloadParamsAllowed = [
'text',
'accept',
'timings',
];
const payload = processUserParameters(options, payloadParamsAllowed);
socket.send(JSON.stringify(payload));
/**
* emitted once the WebSocket connection has been established
* @event SynthesizeStream#open
*/
self.emit('open');
};
socket.onmessage = message => {
const chunk = message.data;
// some info messages are sent as strings, telling the content_type and
// timings. Emit them as separate events, but do not send them along the
// pipe.
if (typeof chunk === 'string') {
try {
const json = JSON.parse(chunk);
if (json['binary_streams']) {
self.emit('binary_streams', message, json);
}
else if (json['marks']) {
self.emit('marks', message, json);
}
else if (json['words']) {
self.emit('words', message, json);
}
else if (json['error']) {
// this should have same structure as onerror emit
const err = new Error(json['error']);
err.name = SynthesizeStream.WEBSOCKET_ERROR;
err['event'] = message;
self.emit('error', err);
}
else if (json['warnings']) {
self.emit('warnings', message, json);
}
}
finally {
self.emit('message', message, chunk);
}
return;
}
/**
* Emit any messages received over the wire, mainly used for debugging.
*
* @event SynthesizeStream#message
* @param {Object} message - frame object received from service
* @param {Object} data - a data attribute of the frame that's a Buffer/TypedArray
*/
const data = Buffer.from(chunk);
self.emit('message', message, data);
self.push(data);
};
socket.onerror = event => {
const err = new Error('WebSocket connection error');
err.name = SynthesizeStream.WEBSOCKET_CONNECTION_ERROR;
err['event'] = event;
self.emit('error', err);
self.push(null);
};
socket.onclose = event => {
self.push(null);
/**
* @event SynthesizeStream#close
* @param {Number} reasonCode
* @param {String} description
*/
self.emit('close', event.code, event.reason);
};
this.initialized = true;
}
_read() {
// even though we aren't controlling the read from websocket,
// we can take advantage of the fact that _read is async and hack
// this funtion to retrieve a token if the service is using IAM auth
this.authenticator.authenticate(this.options).then(
() => {
if (!this.initialized) {
this.initialize();
}
},
err => {
this.emit('error', err);
this.push(null);
}
);
}
/**
* Returns a Promise that resolves with Watson Transaction ID from the X-Transaction-ID header
*
* Works in Node.js but not in browsers (the W3C WebSocket API does not expose headers)
*
* @return Promise<String>
*/
getTransactionId(): Promise<string> {
return extractTransactionId(this);
}
}
namespace SynthesizeStream {
// these options represent the superset of the base params,
// query params, and opening message params, with the keys
// in lowerCamelCase format so we can expose a consistent style
// to the user. this object should be updated any time either
// payloadParamsAllowed or queryParamsAllowed is changed
export interface Options extends ReadableOptions, SynthesizeWebSocketParams {
/* base options */
authenticator: Authenticator;
serviceUrl?: string;
disableSslVerification?: boolean;
agent?: Agent;
}
}
export = SynthesizeStream;