| import os | |
| import sys | |
| import gradio as gr | |
| device = "cuda" | |
| os.system('git clone https://github.com/Rudrabha/Wav2Lip.git') | |
| os.system('pip3 install --upgrade pip') | |
| os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth') | |
| os.system('pip3 install moviepy') | |
| os.system('pip3 uninstall numpy') | |
| os.system('pip3 install --upgrade numpy') | |
| os.system('pip3 install speechRecognition') | |
| os.system('pip3 install gtts') | |
| os.system('pip3 install googletrans==3.1.0a0') | |
| os.system('pip3 install numba==0.48') | |
| os.system('pip3 install transformers') | |
| title = "Automatic translation and dubbing for Indic Languages" | |
| description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu" | |
| article = "Official Repo: https://github.com/Rudrabha/Wav2Lip" | |
| def inference(language,speed,voice,video): | |
| import moviepy.editor as mp | |
| clip = mp.VideoFileClip(video) | |
| clip.audio.write_audiofile(r"audio.wav") | |
| os.system('pip3 install pydub') | |
| os.system('pip3 install transformers==4.11.3 soundfile sentencepiece torchaudio librosa') | |
| speechlist = [] | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| import torch | |
| import torchaudio | |
| import librosa | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") | |
| def get_transcription(audio_path): | |
| speech, sr = librosa.load(audio_path, sr=16000) | |
| resampler = torchaudio.transforms.Resample(sr, 16000) | |
| speech = resampler(speech) | |
| input_values = processor(speech, return_tensors="pt", sampling_rate=16000)["input_values"] | |
| logits = model(input_values)["logits"] | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.decode(predicted_ids[0]) | |
| return transcription.lower() | |
| speechtext = get_transcription("audio.wav") | |
| speechlist.append(speechtext) | |
| text = " ".join(speechlist) | |
| from googletrans import Translator | |
| from gtts import gTTS | |
| translator= Translator() | |
| if speed == "Slow": | |
| con = True | |
| elif speed == "Fast": | |
| con = False | |
| if language == "Hindi": | |
| translation = translator.translate(text, src = 'en', dest='hi', slow=con) | |
| tts = gTTS(translation.text, lang= "hi") | |
| tts.save('input_audio.wav') | |
| elif language == "Tamil": | |
| translation = translator.translate(text, src = 'en', dest='ta', slow=con) | |
| tts = gTTS(translation.text, lang= "ta") | |
| tts.save('input_audio.wav') | |
| elif language == "Bengali": | |
| translation = translator.translate(text, src = 'en', dest='bn', slow=con) | |
| tts = gTTS(translation.text, lang= "hi") | |
| tts.save('input_audio.wav') | |
| elif language == "Telugu": | |
| translation = translator.translate(text, src = 'en', dest='te', slow=con) | |
| tts = gTTS(translation.text, lang= "hi") | |
| tts.save('input_audio.wav') | |
| audio = "input_audio.wav" | |
| os.system('mv ./Wav2Lip/* .') | |
| os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --face {} --audio {}".format(video, audio)) | |
| return "./results/result_voice.mp4" | |
| iface = gr.Interface(inference, inputs=[gr.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.Video(format="mp4", sources="upload", label="Video to be Translated")], outputs=["video"], title=title, description=description, article=article) | |
| iface.launch(allowed_paths=["."]) |