Spaces:
Sleeping
Sleeping
File size: 1,924 Bytes
c51cd4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import librosa
import numpy as np
import soundfile as sf
from transformers import pipeline
import gradio as gr
import os
# Emotion modeli
emotion_model = pipeline("audio-classification", model="superb/hubert-large-superb-er")
def analyze_audio(file):
# 1) WAV yükle
y, sr = librosa.load(file, sr=16000)
# 2) RMS Energy
rms = librosa.feature.rms(y=y)[0]
energy_mean = float(np.mean(rms))
energy_peaks = int(np.sum(rms > energy_mean * 2.0))
# 3) Pitch / Tone Variation
pitch = librosa.yin(y, fmin=50, fmax=500)
pitch_variation = float(np.std(pitch))
# 4) Speaking Pace (words per second)
# sadece kaba tempo analizi için
duration = librosa.get_duration(y=y, sr=sr)
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
pace = float(tempo)
# 5) Pause Detection
silence = librosa.effects.split(y, top_db=30)
num_pauses = len(silence)
# 6) Emotion Classification
emotions = emotion_model(file)
top_emotion = emotions[0]["label"]
top_emotion_score = float(emotions[0]["score"])
# 7) Final Composite Emotional Score
final_score = (
energy_mean * 0.25 +
pitch_variation * 0.30 +
pace * 0.10 +
num_pauses * 0.05 +
top_emotion_score * 0.30
)
return {
"energy_mean": energy_mean,
"energy_peaks": energy_peaks,
"pitch_variation": pitch_variation,
"pace": pace,
"num_pauses": num_pauses,
"emotion": top_emotion,
"emotion_confidence": top_emotion_score,
"audio_emotion_score": final_score
}
def process_file(audio_file):
return analyze_audio(audio_file)
interface = gr.Interface(
fn=process_file,
inputs=gr.Audio(type="filepath"),
outputs="json",
title="Audio Emotion Engine",
description="Extracts emotional, rhythmic and tonal features from audio for viral segment scoring."
)
interface.launch()
|