forked from sashabaranov/go-openai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeech.go
87 lines (73 loc) · 2.21 KB
/
speech.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package openai
import (
"context"
"errors"
"net/http"
)
type SpeechModel string
const (
TTSModel1 SpeechModel = "tts-1"
TTSModel1HD SpeechModel = "tts-1-hd"
TTSModelCanary SpeechModel = "canary-tts"
)
type SpeechVoice string
const (
VoiceAlloy SpeechVoice = "alloy"
VoiceEcho SpeechVoice = "echo"
VoiceFable SpeechVoice = "fable"
VoiceOnyx SpeechVoice = "onyx"
VoiceNova SpeechVoice = "nova"
VoiceShimmer SpeechVoice = "shimmer"
)
type SpeechResponseFormat string
const (
SpeechResponseFormatMp3 SpeechResponseFormat = "mp3"
SpeechResponseFormatOpus SpeechResponseFormat = "opus"
SpeechResponseFormatAac SpeechResponseFormat = "aac"
SpeechResponseFormatFlac SpeechResponseFormat = "flac"
SpeechResponseFormatWav SpeechResponseFormat = "wav"
SpeechResponseFormatPcm SpeechResponseFormat = "pcm"
)
var (
ErrInvalidSpeechModel = errors.New("invalid speech model")
ErrInvalidVoice = errors.New("invalid voice")
)
type CreateSpeechRequest struct {
Model SpeechModel `json:"model"`
Input string `json:"input"`
Voice SpeechVoice `json:"voice"`
ResponseFormat SpeechResponseFormat `json:"response_format,omitempty"` // Optional, default to mp3
Speed float64 `json:"speed,omitempty"` // Optional, default to 1.0
}
func contains[T comparable](s []T, e T) bool {
for _, v := range s {
if v == e {
return true
}
}
return false
}
func isValidSpeechModel(model SpeechModel) bool {
return contains([]SpeechModel{TTSModel1, TTSModel1HD, TTSModelCanary}, model)
}
func isValidVoice(voice SpeechVoice) bool {
return contains([]SpeechVoice{VoiceAlloy, VoiceEcho, VoiceFable, VoiceOnyx, VoiceNova, VoiceShimmer}, voice)
}
func (c *Client) CreateSpeech(ctx context.Context, request CreateSpeechRequest) (response RawResponse, err error) {
if !isValidSpeechModel(request.Model) {
err = ErrInvalidSpeechModel
return
}
if !isValidVoice(request.Voice) {
err = ErrInvalidVoice
return
}
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/audio/speech", string(request.Model)),
withBody(request),
withContentType("application/json"),
)
if err != nil {
return
}
return c.sendRequestRaw(req)
}