Examples for NodeJS, Python, and PHP
Formats

Note that we are using PCM encoded audio for our tests, and for the moment we only supports 8000, 16000, 32000, 48000 sample rates through our live transcription api. We supports everything through our regular audio-transcription API.
🔎 Code Snippets
Here's an example of how you can connect to the Gladia audio transcription WebSocket :
const WebSocket = require('ws');
const fs = require('fs');
// Replace YOUR_API_TOKEN with the API token provided by Gladia team
const API_TOKEN = '';
// Create a new WebSocket connection
const ws = new WebSocket('wss://api.gladia.io/audio/text/audio-transcription');
const file = fs.readFileSync('your-file.wav');
ws.on('open', function open() {
console.log('WebSocket connected!');
// Send audio frames to the API for transcription
const sampleRate = 48000; // replace with your sample rate, 8000, 16000, 32000, 48000 only are accepted
// setTimeout(() => {
console.log('Sending...')
const base64Frames = Buffer.from(file).toString('base64'); // Encode the your audio data into base64
// console.log(base64Frames)
// Create the message object with the required fields
const message = {
x_gladia_key: API_TOKEN,
sample_rate: sampleRate,
frames: base64Frames
};
// Send the message as a string to the WebSocket server
ws.send(JSON.stringify(message));
});
ws.on('message', function incoming(data) {
console.log(data.toString())
});
ws.on('close', function close() {
console.log('WebSocket disconnected!');
});
ws.on('error', function err(error) {
console.log('WebSocket disconnected!');
console.log(error)
});
import websocket
import base64
# Replace with your own API token
x_gladia_key = 'YOUR_API_TOKEN'
# Replace with the sample rate of your audio
sample_rate = 16000
# Replace with the path to your audio file
audio_file = 'path/to/your/audio/file.wav'
# Open the audio file and read the data
with open(audio_file, 'rb') as f:
audio_data = f.read()
# Encode the audio data as base64
encoded_audio_data = base64.b64encode(audio_data)
# Define the WebSocket endpoint URL
endpoint_url = 'wss://api.gladia.io/audio/text/audio-transcription'
# Define the WebSocket message
message = {
'x_gladia_key': x_gladia_key,
'sample_rate': sample_rate,
'frames': encoded_audio_data
}
# Define the on_message handler
def on_message(ws, message):
print(message)
# Create the WebSocket connection
ws = websocket.WebSocketApp(endpoint_url, on_message=on_message)
# Send the message to the WebSocket endpoint
ws.send(str(message))
# Run the WebSocket connection
ws.run_forever()
use Ratchet\\Client\\WebSocket;
use Ratchet\\Client\\Connector;
use React\\EventLoop\\Factory as LoopFactory;
// Set up the audio frames
$audio_file = '/path/to/audio.wav';
$audio_data = file_get_contents($audio_file);
$audio_base64 = base64_encode($audio_data);
// Set up the WebSocket connection
$loop = LoopFactory::create();
$connector = new Connector($loop);
$connector('wss://api.gladia.io/audio/text/audio-transcription')
->then(function (WebSocket $conn) use ($audio_base64) {
// Send audio frames to the API for transcription
$x_gladia_key = 'YOUR_API_TOKEN';
$sample_rate = 44100; // Example sample rate
$conn->send(json_encode([
'x_gladia_key' => $x_gladia_key,
'sample_rate' => $sample_rate,
'frames' => $audio_base64,
]));
// Listen for API response
$conn->on('message', function ($msg) {
echo "Received response from Gladia API: {$msg}\\n";
});
}, function ($e) {
echo "Could not connect to Gladia API: {$e->getMessage()}\\n";
});
$loop->run();
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/bio.h>
#include <openssl/evp.h>
#include <openssl/buffer.h>
#include <json-c/json.h>
#include <libwebsockets.h>
typedef struct user_data {
unsigned char *json_data;
} user_data_t;
int message_sent = 0;
unsigned char *read_file(const char *filename, size_t *size) {
FILE *file = fopen(filename, "rb");
if (!file) {
fprintf(stderr, "Error opening file: %s\n", filename);
return NULL;
}
fseek(file, 0, SEEK_END);
long file_size = ftell(file);
fseek(file, 0, SEEK_SET);
unsigned char *buffer = (unsigned char *)malloc(file_size);
if (!buffer) {
fclose(file);
fprintf(stderr, "Error allocating memory for file buffer\n");
return NULL;
}
size_t bytes_read = fread(buffer, 1, file_size, file);
fclose(file);
if (bytes_read != file_size) {
free(buffer);
fprintf(stderr, "Error reading file: %s\n", filename);
return NULL;
}
*size = file_size;
return buffer;
}
// WebSocket client protocol and callback
static int callback_audio_transcription(struct lws *wsi, enum lws_callback_reasons reason, void *user, void *in, size_t len) {
switch (reason) {
case LWS_CALLBACK_CLIENT_ESTABLISHED:
lwsl_user("WebSocket connection established\n");
lws_callback_on_writable(wsi);
break;
case LWS_CALLBACK_CLIENT_RECEIVE:
lwsl_user("Received data: %s\n", (const char *)in);
break;
case LWS_CALLBACK_CLIENT_CONNECTION_ERROR:
lwsl_err("WebSocket connection error: %s\n", in ? (char *)in : "(null)");
break;
case LWS_CALLBACK_CLOSED:
lwsl_user("WebSocket connection closed\n");
break;
case LWS_CALLBACK_CLIENT_WRITEABLE:
{
if (!message_sent) {
message_sent = 1;
user_data_t *user_data = (user_data_t *)user;
if (user_data && user_data->json_data) {
int len = strlen(user_data->json_data);
int n = lws_write(wsi, user_data->json_data, len, LWS_WRITE_TEXT);
if (n != len) {
lwsl_err("ERROR %d writing to ws socket\n", n);
return -1;
}
lwsl_user("%s: written %d bytes\n", __func__, n);
}
}
break;
}
default:
break;
}
return 0;
}
static const struct lws_protocols protocols[] = {
{"audio-transcription", callback_audio_transcription, sizeof(char *), 0},
{NULL, NULL, 0, 0} /* terminator */
};
int main() {
const char *str = "This is a test string.";
size_t file_size;
unsigned char *file_data = read_file("yourfile.wav", &file_size);
if (!file_data) {
return -1;
}
int len = file_size;
unsigned char *out1 = NULL;
unsigned char *out2 = NULL;
unsigned char *ptr = NULL;
BIO *bmem, *b64;
BUF_MEM *bptr;
b64 = BIO_new(BIO_f_base64());
bmem = BIO_new(BIO_s_mem());
b64 = BIO_push(b64, bmem);
BIO_write(b64, file_data, len);
BIO_flush(b64);
BIO_get_mem_ptr(b64, &bptr);
BIO_set_close(b64, BIO_NOCLOSE);
out1 = (unsigned char *)malloc(2 * bptr->length + 100);
ptr = out1;
for (int n = 0; n < (int)bptr->length; n++) {
if (bptr->data[n] != '\n')
*ptr++ = bptr->data[n];
}
*ptr = 0;
out2 = (unsigned char *)malloc(2 * bptr->length + 100);
BIO_free_all(b64);
{
struct json_object *jobj;
jobj = json_object_new_object();
json_object_object_add(jobj, "x_gladia_key", json_object_new_string("your_token"));
json_object_object_add(jobj, "sample_rate", json_object_new_int(48000));
json_object_object_add(jobj, "frames", json_object_new_string(out1));
strcpy(out2, json_object_to_json_string_ext(jobj, JSON_C_TO_STRING_PLAIN));
printf("Message: %s\n", out2);
json_object_put(jobj); // Delete the json object
}
struct lws_context_creation_info info;
struct lws_client_connect_info i;
struct lws_context *context;
struct lws *wsi;
memset(&info, 0, sizeof(info));
memset(&i, 0, sizeof(i));
lws_set_log_level(LLL_USER | LLL_ERR | LLL_WARN | LLL_NOTICE, NULL);
info.port = CONTEXT_PORT_NO_LISTEN;
info.protocols = protocols;
info.gid = -1;
info.uid = -1;
context = lws_create_context(&info);
if (!context) {
lwsl_err("Creating WebSocket context failed\n");
return -1;
}
user_data_t *user_data = (user_data_t *)malloc(sizeof(user_data_t));
user_data->json_data = out2;
i.context = context;
i.address = "localhost";
i.port = 9000;
i.ssl_connection = 0;
i.path = "/audio/text/audio-transcription";
i.host = i.address;
i.origin = i.address;
i.protocol = protocols[0].name;
i.userdata = user_data; // Pass the user data struct to the lws_client_connect_info
wsi = lws_client_connect_via_info(&i);
if (!wsi) {
lwsl_err("WebSocket connection failed\n");
return -1;
}
lws_callback_on_writable(wsi);
while (lws_service(context, 100) >= 0);
lws_context_destroy(context);
free(out1);
free(out2);
free(user_data); // Free the user_data_t struct
free(file_data);
return 0;
}
package com.example;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.handshake.ServerHandshake;
import java.net.URI;
import com.google.gson.Gson;
public class AudioTranscriptionClient extends WebSocketClient {
private static final String X_GLADIA_KEY = "YOUR_API_TOKEN";
private static final int SAMPLE_RATE = 16000;
private static Gson gson = new Gson();
public AudioTranscriptionClient(URI serverUri) {
super(serverUri);
}
public static void main(String[] args) throws Exception {
String audioFilePath = "path/to/your/audio/file.wav";
File audioFile = new File(audioFilePath);
if (!audioFile.exists()) {
System.err.println("File not found: " + audioFilePath);
return;
}
byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
String encodedAudioData = Base64.getEncoder().encodeToString(audioData);
Map<String, Object> message = new HashMap<>();
message.put("x_gladia_key", X_GLADIA_KEY);
message.put("sample_rate", SAMPLE_RATE);
message.put("frames", encodedAudioData);
/* print the message */
System.out.println(gson.toJson(message));
AudioTranscriptionClient client = new AudioTranscriptionClient(new URI("wss://api.gladia.io/audio/text/audio-transcription"));
client.connectBlocking();
client.send(gson.toJson(message));
while (!client.isClosed()) {
Thread.sleep(500);
}
}
@Override
public void onOpen(ServerHandshake handshake) {
System.out.println("Connected to server.");
}
@Override
public void onMessage(String message) {
System.out.println("Received message: " + message);
}
@Override
public void onClose(int code, String reason, boolean remote) {
System.out.println("Connection closed. Reason: " + reason);
}
@Override
public void onError(Exception ex) {
System.err.println("Error occurred: " + ex.getMessage());
}
}
plugins {
id 'java'
id 'application'
}
repositories {
jcenter()
}
dependencies {
implementation 'com.google.code.gson:gson:2.8.9'
implementation 'org.java-websocket:Java-WebSocket:1.4.1'
}
mainClassName = 'com.example.AudioTranscriptionClient'
jar {
manifest {
attributes 'Main-Class': 'com.example.AudioTranscriptionClient'
}
}
tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
}
tasks.withType(JavaExec) {
standardInput = System.in
}
You need to replace API_TOKEN with the actual API token provided by the Gladia team. When the WebSocket connection is established, the code encodes the audio frames in base64, creates a message object with the required fields, and sends the message as a string to the WebSocket server. The server will respond with the transcription of the audio frames.