-
Notifications
You must be signed in to change notification settings - Fork 4
/
types.go
346 lines (291 loc) · 10.1 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
package openairt
import (
"encoding/json"
)
type Voice string
const (
VoiceAlloy Voice = "alloy"
VoiceAsh Voice = "ash"
VoiceBallad Voice = "ballad"
VoiceCoral Voice = "coral"
VoiceEcho Voice = "echo"
VoiceSage Voice = "sage"
VoiceShimmer Voice = "shimmer"
VoiceVerse Voice = "verse"
)
type AudioFormat string
const (
AudioFormatPcm16 AudioFormat = "pcm16"
AudioFormatG711Ulaw AudioFormat = "g711_ulaw"
AudioFormatG711Alaw AudioFormat = "g711_alaw"
)
type Modality string
const (
ModalityText Modality = "text"
ModalityAudio Modality = "audio"
)
type ClientTurnDetectionType string
const (
ClientTurnDetectionTypeServerVad ClientTurnDetectionType = "server_vad"
)
type ServerTurnDetectionType string
const (
ServerTurnDetectionTypeNone ServerTurnDetectionType = "none"
ServerTurnDetectionTypeServerVad ServerTurnDetectionType = "server_vad"
)
type TurnDetectionType string
const (
// TurnDetectionTypeNone means turn detection is disabled.
// This can only be used in ServerSession, not in ClientSession.
// If you want to disable turn detection, you should send SessionUpdateEvent with TurnDetection set to nil.
TurnDetectionTypeNone TurnDetectionType = "none"
// TurnDetectionTypeServerVad use server-side VAD to detect turn.
// This is default value for newly created session.
TurnDetectionTypeServerVad TurnDetectionType = "server_vad"
)
type TurnDetectionParams struct {
// Activation threshold for VAD.
Threshold float64 `json:"threshold,omitempty"`
// Audio included before speech starts (in milliseconds).
PrefixPaddingMs int `json:"prefix_padding_ms,omitempty"`
// Duration of silence to detect speech stop (in milliseconds).
SilenceDurationMs int `json:"silence_duration_ms,omitempty"`
}
type ClientTurnDetection struct {
// Type of turn detection, only "server_vad" is currently supported.
Type ClientTurnDetectionType `json:"type"`
TurnDetectionParams
}
type ServerTurnDetection struct {
// The type of turn detection ("server_vad" or "none").
Type ServerTurnDetectionType `json:"type"`
TurnDetectionParams
}
type ToolType string
const (
ToolTypeFunction ToolType = "function"
)
type ToolChoiceInterface interface {
ToolChoice()
}
type ToolChoiceString string
func (ToolChoiceString) ToolChoice() {}
const (
ToolChoiceAuto ToolChoiceString = "auto"
ToolChoiceNone ToolChoiceString = "none"
ToolChoiceRequired ToolChoiceString = "required"
)
type ToolChoice struct {
Type ToolType `json:"type"`
Function ToolFunction `json:"function,omitempty"`
}
func (t ToolChoice) ToolChoice() {}
type ToolFunction struct {
Name string `json:"name"`
}
type MessageRole string
const (
MessageRoleSystem MessageRole = "system"
MessageRoleAssistant MessageRole = "assistant"
MessageRoleUser MessageRole = "user"
)
type InputAudioTranscription struct {
// The model used for transcription.
Model string `json:"model"`
}
type Tool struct {
Type ToolType `json:"type"`
Name string `json:"name"`
Description string `json:"description"`
Parameters any `json:"parameters"`
}
type MessageItemType string
const (
MessageItemTypeMessage MessageItemType = "message"
MessageItemTypeFunctionCall MessageItemType = "function_call"
MessageItemTypeFunctionCallOutput MessageItemType = "function_call_output"
)
type MessageContentType string
const (
MessageContentTypeText MessageContentType = "text"
MessageContentTypeAudio MessageContentType = "audio"
MessageContentTypeTranscript MessageContentType = "transcript"
MessageContentTypeInputText MessageContentType = "input_text"
MessageContentTypeInputAudio MessageContentType = "input_audio"
)
type MessageContentPart struct {
// The content type.
Type MessageContentType `json:"type"`
// The text content. Validated if type is text.
Text string `json:"text,omitempty"`
// Base64-encoded audio data. Validated if type is audio.
Audio string `json:"audio,omitempty"`
// The transcript of the audio. Validated if type is transcript.
Transcript string `json:"transcript,omitempty"`
}
type MessageItem struct {
// The unique ID of the item.
ID string `json:"id,omitempty"`
// The type of the item ("message", "function_call", "function_call_output").
Type MessageItemType `json:"type"`
// The final status of the item.
Status ItemStatus `json:"status,omitempty"`
// The role associated with the item.
Role MessageRole `json:"role,omitempty"`
// The content of the item.
Content []MessageContentPart `json:"content,omitempty"`
// The ID of the function call, if the item is a function call.
CallID string `json:"call_id,omitempty"`
// The name of the function, if the item is a function call.
Name string `json:"name,omitempty"`
// The arguments of the function, if the item is a function call.
Arguments string `json:"arguments,omitempty"`
// The output of the function, if the item is a function call output.
Output string `json:"output,omitempty"`
}
type ResponseMessageItem struct {
MessageItem
// The object type, must be "realtime.item".
Object string `json:"object,omitempty"`
}
type Error struct {
// The type of error (e.g., "invalid_request_error", "server_error").
Message string `json:"message,omitempty"`
// Error code, if any.
Type string `json:"type,omitempty"`
// A human-readable error message.
Code string `json:"code,omitempty"`
// Parameter related to the error, if any.
Param string `json:"param,omitempty"`
// The event_id of the client event that caused the error, if applicable.
EventID string `json:"event_id,omitempty"`
}
// ServerToolChoice is a type that can be used to choose a tool response from the server.
type ServerToolChoice struct {
String ToolChoiceString
Function ToolChoice
}
// UnmarshalJSON is a custom unmarshaler for ServerToolChoice.
func (m *ServerToolChoice) UnmarshalJSON(data []byte) error {
err := json.Unmarshal(data, &m.Function)
if err != nil {
if data[0] == '"' {
data = data[1:]
}
if data[len(data)-1] == '"' {
data = data[:len(data)-1]
}
m.String = ToolChoiceString(data)
m.Function = ToolChoice{}
return nil
}
return nil
}
// IsFunction returns true if the tool choice is a function call.
func (m *ServerToolChoice) IsFunction() bool {
return m.Function.Type == ToolTypeFunction
}
// Get returns the ToolChoiceInterface based on the type of tool choice.
func (m ServerToolChoice) Get() ToolChoiceInterface {
if m.IsFunction() {
return m.Function
}
return m.String
}
type ServerSession struct {
// The unique ID of the session.
ID string `json:"id"`
// The object type, must be "realtime.session".
Object string `json:"object"`
// The default model used for this session.
Model string `json:"model"`
// The set of modalities the model can respond with.
Modalities []Modality `json:"modalities,omitempty"`
// The default system instructions.
Instructions string `json:"instructions,omitempty"`
// The voice the model uses to respond - one of alloy, echo, or shimmer.
Voice Voice `json:"voice,omitempty"`
// The format of input audio.
InputAudioFormat AudioFormat `json:"input_audio_format,omitempty"`
// The format of output audio.
OutputAudioFormat AudioFormat `json:"output_audio_format,omitempty"`
// Configuration for input audio transcription.
InputAudioTranscription *InputAudioTranscription `json:"input_audio_transcription,omitempty"`
// Configuration for turn detection.
TurnDetection *ServerTurnDetection `json:"turn_detection,omitempty"`
// Tools (functions) available to the model.
Tools []Tool `json:"tools,omitempty"`
// How the model chooses tools.
ToolChoice ServerToolChoice `json:"tool_choice,omitempty"`
// Sampling temperature.
Temperature *float32 `json:"temperature,omitempty"`
// Maximum number of output tokens.
MaxOutputTokens IntOrInf `json:"max_response_output_tokens,omitempty"`
}
type ItemStatus string
const (
ItemStatusInProgress ItemStatus = "in_progress"
ItemStatusCompleted ItemStatus = "completed"
ItemStatusIncomplete ItemStatus = "incomplete"
)
type Conversation struct {
// The unique ID of the conversation.
ID string `json:"id"`
// The object type, must be "realtime.conversation".
Object string `json:"object"`
}
type ResponseStatus string
const (
ResponseStatusInProgress ResponseStatus = "in_progress"
ResponseStatusCompleted ResponseStatus = "completed"
ResponseStatusCancelled ResponseStatus = "cancelled"
ResponseStatusIncomplete ResponseStatus = "incomplete"
ResponseStatusFailed ResponseStatus = "failed"
)
type CachedTokensDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
}
type InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
CachedTokensDetails CachedTokensDetails `json:"cached_tokens_details,omitempty"`
}
type OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
}
type Usage struct {
TotalTokens int `json:"total_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
// Input token details.
InputTokenDetails InputTokenDetails `json:"input_token_details,omitempty"`
// Output token details.
OutputTokenDetails OutputTokenDetails `json:"output_token_details,omitempty"`
}
type Response struct {
// The unique ID of the response.
ID string `json:"id"`
// The object type, must be "realtime.response".
Object string `json:"object"`
// The status of the response.
Status ResponseStatus `json:"status"`
// Additional details about the status.
StatusDetails any `json:"status_details,omitempty"`
// The list of output items generated by the response.
Output []ResponseMessageItem `json:"output"`
// Usage statistics for the response.
Usage *Usage `json:"usage,omitempty"`
}
type RateLimit struct {
// The name of the rate limit ("requests", "tokens", "input_tokens", "output_tokens").
Name string `json:"name"`
// The maximum allowed value for the rate limit.
Limit int `json:"limit"`
// The remaining value before the limit is reached.
Remaining int `json:"remaining"`
// Seconds until the rate limit resets.
ResetSeconds float64 `json:"reset_seconds"`
}