Spaces:

jacob1576
/

AudioTextHTDemucs

Running

App Files Files Community

AudioTextHTDemucs / config.yaml

jacob1576

Updated pip requirements and added code to load model from HF hub

cffc5b3 1 day ago

raw

history blame contribute delete

3.73 kB

	data:
	train_dir: /home/jacob/datasets/musdb18/train # Path to train subfolder of MUSDB18 dataset
	test_dir: /home/jacob/datasets/musdb18/test # Path to test subfolder of MUSDB18 dataset
	segment_seconds: 6.0 # Length of audio segments for training [s]
	pct_train: 0.2 # Decimal percentage of full data to use for training (otherwise 1 epoch takes ~15 hrs)
	pct_test: 0.1 # Decimal percentage of full data to use for testing
	overlap: 0.1 # Overlap between segments for chunked inference [s]
	sample_rate: 44100 # Sample rate for audio files [Hz]
	channels: 2 # Number of audio channels (1 = mono, 2 = stereo)
	random_segments: False # Whether to use random segments during training
	augment: True # Whether to use data augmentation (gain adjustment and channel swapping)

	model:
	name: Audio-Text-HTDemucs # Model name
	model_dim: 384 # Model dimension
	text_dim: 512 # Text embedding dimension (laion/clap-htsat-unfused is 512)
	num_heads: 8 # Number of attention heads for text cross-attention layer
	device: cpu # Device to use for training (cuda for GPU or cpu)
	use_amp: False # Whether to use automatic mixed precision (AMP) during training - WORK IN PROGRESS

	training:
	batch_size: 8 # Batch size for training
	num_workers: 0 # Number of DataLoader workers
	num_epochs: 20 # Number of training epochs
	optimizer:
	name: AdamW
	lr: 1e-4 # Learning rate
	weight_decay: 1e-2 # Weight decay for optimizer
	grad_clip: 5.0 # Gradient clipping value (set to null to disable)
	loss_weights:
	sdr: 0.9 # Weight for SDR loss
	sisdr_weight: 0.1 # Weight for SI-SDR loss, total loss is (sdr_weight * sdr) + (sisdr_weight * si_sdr)
	use_L1_comb_loss: False # Whether to use L1 combination loss
	L1_comb_loss:
	sdr_weight: 1.0 # Weight for SDR in L1 combination loss
	l1_weight: 0.1 # Weight for L1 loss in L1 combination loss
	#resume_from: null # Path to checkpoint to resume training from (set to null to train from scratch)
	resume_from: checkpoints/2025_11_30_batch4/best_model.pt

	wandb:
	use_wandb: False # Whether to use Weights & Biases for experiment tracking
	project: audio-text-htdemucs # Wandb project name
	run_name: null
	log_every: 50 # Log to wandb every N batches
	validate_every: 1 # Validate every N epochs
	save_every: 5 # Save model checkpoint every N epochs
	checkpoint_dir: checkpoints/2025_12_06/ # Directory to save model checkpoints
	output_dir: results/2025_12_06 # Directory to save inference results