speech-to-text / app.py
rmysmo's picture
added 4 files
5bf3f7c verified
import gradio as gr
from vosk import Model, KaldiRecognizer
import wave
import json
# Set up the Vosk model
model = Model("vosk-model-small-uz-0.22")
def recognize_from_file(audio_file):
wf = wave.open(audio_file, "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
return "Audio file must be WAV format mono PCM."
recognizer = KaldiRecognizer(model, wf.getframerate())
result_text = ""
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if recognizer.AcceptWaveform(data):
result = json.loads(recognizer.Result())
result_text += result.get('text', '') + " "
final_result = json.loads(recognizer.FinalResult())
result_text += final_result.get('text', '')
return result_text
iface = gr.Interface(
fn=recognize_from_file,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Speech Recognition from Audio File",
description="Upload a WAV file for recognition."
)
iface.launch()