voice-model-rl-training / configs /fast_experiment.yaml
mbellan's picture
Initial deployment
c3efd49
# Fast experimentation configuration
# Quickly test different reward functions and hyperparameters
model:
name: "microsoft/wavlm-base-plus"
enable_rl: true
action_dim: 256
action_representation: "discrete"
training:
device: "cpu" # Change to "cuda" if you have GPU
num_episodes: 20 # Moderate number for quick experiments
batch_size: 16 # Larger batch = faster training per episode
episode_length: 10
checkpoint_interval: 10
checkpoint_dir: "training_runs/fast/checkpoints"
max_checkpoints: 5
log_interval: 1
random_seed: 42
data:
raw_data_dir: "data/raw"
sample_rate: 16000
train_split: 0.7
val_split: 0.15
test_split: 0.15
algorithm:
name: "ppo"
learning_rate: 0.0003 # Higher LR for faster learning
gamma: 0.95 # Lower gamma = focus on immediate rewards
gae_lambda: 0.95
clip_epsilon: 0.2
value_loss_coef: 0.5
entropy_coef: 0.02 # More exploration
max_grad_norm: 1.0
reward:
weights:
clarity: 0.5 # Strong emphasis on clarity
naturalness: 0.25
accuracy: 0.25
use_asr: true
asr_model: "facebook/wav2vec2-base-960h"
monitoring:
log_dir: "training_runs/fast/logs"
visualization_dir: "training_runs/fast/visualizations"
save_frequency: 5