🎉 MiniMax-M2.7: Peak Performance. Ultimate Value. Master the Complex. ➔ Try Now.
{
"event": "task_start",
"model": "speech-2.8-turbo",
"language_boost": "Chinese",
"voice_setting": {
"voice_id": "English_expressive_narrator",
"speed": 1,
"vol": 1,
"pitch": 0
},
"pronunciation_dict": {
"tone": [
"Omg/Oh my god"
]
},
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
"channel": 1
}
}{
"event": "task_continue",
"text": "Omg(sighs), the real danger is not that computers start thinking like people, but that people start thinking like computers. Computers can only help us with simple tasks."
}{
"event": "task_finish"
}{
"session_id": "xxxx",
"event": "connected_success",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_started",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"data": {
"audio": "xxx"
},
"extra_info": {
"audio_channel": 1,
"audio_format": "mp3",
"audio_length": 9914,
"audio_sample_rate": 32000,
"audio_size": 157869,
"bitrate": 128000,
"invisible_character_ratio": 0,
"usage_characters": 158,
"word_count": 158
},
"is_final": true,
"session_id": "301871346491491",
"trace_id": "04ee3794e2c9e4a6d5f99e77742f06fd",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_finished",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_failed",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 1004,
"status_msg": "XXXXXXX"
}
}Use this API for synchronous t2a over WebSocket.
This example streams and plays the returned audio in real time while also saving the complete audio file. Note: To enable real-time audio playback, you must first install the mpv player. Additionally, make sure to set your API key in the environment variableDocumentation Index
Fetch the complete documentation index at: https://platform.minimax.io/docs/llms.txt
Use this file to discover all available pages before exploring further.
MINIMAX_API_KEY.
import asyncio
import websockets
import json
import ssl
import subprocess
import os
model = "speech-2.8-hd"
file_format = "mp3"
class StreamAudioPlayer:
def __init__(self):
self.mpv_process = None
def start_mpv(self):
"""Start MPV player process"""
try:
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
self.mpv_process = subprocess.Popen(
mpv_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
print("MPV player started")
return True
except FileNotFoundError:
print("Error: mpv not found. Please install mpv")
return False
except Exception as e:
print(f"Failed to start mpv: {e}")
return False
def play_audio_chunk(self, hex_audio):
"""Play audio chunk"""
try:
if self.mpv_process and self.mpv_process.stdin:
audio_bytes = bytes.fromhex(hex_audio)
self.mpv_process.stdin.write(audio_bytes)
self.mpv_process.stdin.flush()
return True
except Exception as e:
print(f"Play failed: {e}")
return False
return False
def stop(self):
"""Stop player"""
if self.mpv_process:
if self.mpv_process.stdin and not self.mpv_process.stdin.closed:
self.mpv_process.stdin.close()
try:
self.mpv_process.wait(timeout=20)
except subprocess.TimeoutExpired:
self.mpv_process.terminate()
async def establish_connection(api_key):
"""Establish WebSocket connection"""
url = "wss://api.minimax.io/ws/v1/t2a_v2"
headers = {"Authorization": f"Bearer {api_key}"}
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
try:
ws = await websockets.connect(url, additional_headers=headers, ssl=ssl_context)
connected = json.loads(await ws.recv())
if connected.get("event") == "connected_success":
print("Connection successful")
return ws
return None
except Exception as e:
print(f"Connection failed: {e}")
return None
async def start_task(websocket):
"""Send task start request"""
start_msg = {
"event": "task_start",
"model": model,
"voice_setting": {
"voice_id": "male-qn-qingse",
"speed": 1,
"vol": 1,
"pitch": 0,
"english_normalization": False
}
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": file_format,
"channel": 1
}
}
await websocket.send(json.dumps(start_msg))
response = json.loads(await websocket.recv())
return response.get("event") == "task_started"
async def continue_task_with_stream_play(websocket, text, player):
"""Send continue request and stream play audio"""
await websocket.send(json.dumps({
"event": "task_continue",
"text": text
}))
chunk_counter = 1
total_audio_size = 0
audio_data = b""
while True:
try:
response = json.loads(await websocket.recv())
if "data" in response and "audio" in response["data"]:
audio = response["data"]["audio"]
if audio:
print(f"Playing chunk #{chunk_counter}")
audio_bytes = bytes.fromhex(audio)
if player.play_audio_chunk(audio):
total_audio_size += len(audio_bytes)
audio_data += audio_bytes
chunk_counter += 1
if response.get("is_final"):
print(f"Audio synthesis completed: {chunk_counter-1} chunks")
if player.mpv_process and player.mpv_process.stdin:
player.mpv_process.stdin.close()
# Save audio to file
with open(f"output.{file_format}", "wb") as f:
f.write(audio_data)
print(f"Audio saved as output.{file_format}")
estimated_duration = total_audio_size * 0.0625 / 1000
wait_time = max(estimated_duration + 5, 10)
return wait_time
except Exception as e:
print(f"Error: {e}")
break
return 10
async def close_connection(websocket):
"""Close connection"""
if websocket:
try:
await websocket.send(json.dumps({"event": "task_finish"}))
await websocket.close()
except Exception:
pass
async def main():
API_KEY = os.getenv("MINIMAX_API_KEY")
TEXT = "The real danger is not that computers start thinking like people(sighs), but that people start thinking like computers. Computers can only help us with simple tasks."
player = StreamAudioPlayer()
try:
if not player.start_mpv():
return
ws = await establish_connection(API_KEY)
if not ws:
return
if not await start_task(ws):
print("Task startup failed")
return
wait_time = await continue_task_with_stream_play(ws, TEXT, player)
await asyncio.sleep(wait_time)
except Exception as e:
print(f"Error: {e}")
finally:
player.stop()
if 'ws' in locals():
await close_connection(ws)
if __name__ == "__main__":
asyncio.run(main())
{
"event": "task_start",
"model": "speech-2.8-turbo",
"language_boost": "Chinese",
"voice_setting": {
"voice_id": "English_expressive_narrator",
"speed": 1,
"vol": 1,
"pitch": 0
},
"pronunciation_dict": {
"tone": [
"Omg/Oh my god"
]
},
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
"channel": 1
}
}{
"event": "task_continue",
"text": "Omg(sighs), the real danger is not that computers start thinking like people, but that people start thinking like computers. Computers can only help us with simple tasks."
}{
"event": "task_finish"
}{
"session_id": "xxxx",
"event": "connected_success",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_started",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"data": {
"audio": "xxx"
},
"extra_info": {
"audio_channel": 1,
"audio_format": "mp3",
"audio_length": 9914,
"audio_sample_rate": 32000,
"audio_size": 157869,
"bitrate": 128000,
"invisible_character_ratio": 0,
"usage_characters": 158,
"word_count": 158
},
"is_final": true,
"session_id": "301871346491491",
"trace_id": "04ee3794e2c9e4a6d5f99e77742f06fd",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_finished",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 0,
"status_msg": "success"
}
}{
"session_id": "xxxx",
"event": "task_failed",
"trace_id": "0303a2882bf18235ae7a809ae0f3cca7",
"base_resp": {
"status_code": 1004,
"status_msg": "XXXXXXX"
}
}Sending the "task_start" event officially begins the speech synthesis task. The task is considered successfully started when the server returns a "task_started" event. Only after receiving this event can you send "task_continue" or "task_finish" events to the server.
After receiving the "task_started" event from the server, the task officially begins. You can send "task_continue" events to provide text for synthesis. Multiple "task_continue" events can be sent sequentially. If no new event is sent within 120 seconds after receiving the last result, the WebSocket connection will automatically close.
When the server receives the task_finish event, it waits for all tasks in the current queue to complete, then closes the WebSocket connection and ends the session.
Notification that T2A task has started
Notification that T2A task has started
Notification that T2A task is continuing
Notification that T2A task has completed successfully
If the task_failed event is received, it indicates that the task has failed. In this case, the WebSocket connection must be closed, and the error should be handled.