Spaces:
Runtime error
Runtime error
| import librosa | |
| import gradio as gr | |
| import numpy as np | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| import soundfile as sf | |
| import torch | |
| # load model and tokenizer | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
| def speech2text(audio): | |
| sr, data = audio | |
| # resample to 16hz | |
| data_16hz = librosa.resample(data[:,0].astype(np.float32),sr,16000) | |
| # tokenize | |
| input_values = processor([data_16hz], return_tensors="pt", padding="longest").input_values # Batch size 1 | |
| # retrieve logits | |
| logits = model(input_values).logits | |
| # take argmax and decode | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids) | |
| return transcription[0].lower() # batch size 1 | |
| iface = gr.Interface(speech2text, "microphone", "text") | |
| iface.launch() | |