interface VadConfig {
    prefix_padding_ms?: number;
    silence_duration_ms?: number;
    threshold?: number;
    type: "server_vad";
}

Properties

prefix_padding_ms?: number

Amount of audio to include before the VAD detected speech (in milliseconds).

silence_duration_ms?: number

Duration of silence to detect speech stop (in milliseconds). With shorter values the model will respond more quickly, but may jump in on short pauses from the user.

threshold?: number

Sensitivity threshold (0.0 to 1.0) for voice activity detection. A higher threshold will require louder audio to activate the model, and thus might perform better in noisy environments.

type: "server_vad"

Must be set to server_vad to enable manual chunking using server side VAD.