Spaces:

erydmn
/

AudioEngine

Sleeping

App Files Files Community

AudioEngine / app.py

erydmn

Create app.py

c51cd4a verified 16 days ago

raw

history blame contribute delete

1.92 kB

	import librosa
	import numpy as np
	import soundfile as sf
	from transformers import pipeline
	import gradio as gr
	import os

	# Emotion modeli
	emotion_model = pipeline("audio-classification", model="superb/hubert-large-superb-er")

	def analyze_audio(file):
	# 1) WAV yükle
	y, sr = librosa.load(file, sr=16000)

	# 2) RMS Energy
	rms = librosa.feature.rms(y=y)[0]
	energy_mean = float(np.mean(rms))
	energy_peaks = int(np.sum(rms > energy_mean * 2.0))

	# 3) Pitch / Tone Variation
	pitch = librosa.yin(y, fmin=50, fmax=500)
	pitch_variation = float(np.std(pitch))

	# 4) Speaking Pace (words per second)
	# sadece kaba tempo analizi için
	duration = librosa.get_duration(y=y, sr=sr)
	tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
	pace = float(tempo)

	# 5) Pause Detection
	silence = librosa.effects.split(y, top_db=30)
	num_pauses = len(silence)

	# 6) Emotion Classification
	emotions = emotion_model(file)
	top_emotion = emotions[0]["label"]
	top_emotion_score = float(emotions[0]["score"])

	# 7) Final Composite Emotional Score
	final_score = (
	energy_mean * 0.25 +
	pitch_variation * 0.30 +
	pace * 0.10 +
	num_pauses * 0.05 +
	top_emotion_score * 0.30
	)

	return {
	"energy_mean": energy_mean,
	"energy_peaks": energy_peaks,
	"pitch_variation": pitch_variation,
	"pace": pace,
	"num_pauses": num_pauses,
	"emotion": top_emotion,
	"emotion_confidence": top_emotion_score,
	"audio_emotion_score": final_score
	}


	def process_file(audio_file):
	return analyze_audio(audio_file)

	interface = gr.Interface(
	fn=process_file,
	inputs=gr.Audio(type="filepath"),
	outputs="json",
	title="Audio Emotion Engine",
	description="Extracts emotional, rhythmic and tonal features from audio for viral segment scoring."
	)

	interface.launch()