Upload 6 files

Files changed (6) hide show

audio_vae/config.json CHANGED Viewed

@@ -1,19 +1,20 @@
 {
   "_class_name": "AutoencoderKLLTX2Audio",
-  "_diffusers_version": "0.37.0.dev0",
   "attn_resolutions": null,
-  "base_channels": 16,
   "causality_axis": "height",
   "ch_mult": [
     1,
-    2
   ],
   "double_z": true,
   "dropout": 0.0,
   "in_channels": 2,
   "is_causal": true,
-  "latent_channels": 4,
-  "mel_bins": 16,
   "mel_hop_length": 160,
   "mid_block_add_attention": false,
   "norm_type": "pixel",

 {
   "_class_name": "AutoencoderKLLTX2Audio",
+  "_diffusers_version": "0.38.0.dev0",
   "attn_resolutions": null,
+  "base_channels": 128,
   "causality_axis": "height",
   "ch_mult": [
     1,
+    2,
+    4
   ],
   "double_z": true,
   "dropout": 0.0,
   "in_channels": 2,
   "is_causal": true,
+  "latent_channels": 8,
+  "mel_bins": 64,
   "mel_hop_length": 160,
   "mid_block_add_attention": false,
   "norm_type": "pixel",

audio_vae/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c49400e9ca1e90614af15569e829d361b42eaa1f8fc676dc9db4581be6462088
-size 321628

 version https://git-lfs.github.com/spec/v1
+oid sha256:615ce5acd612c830617d3197d009254bd6c8356e6917aa002a6f96ed01fbbb17
+size 163442552

transformer/config.json CHANGED Viewed

@@ -1,16 +1,18 @@
 {
   "_class_name": "LTX2VideoTransformer3DModel",
-  "_diffusers_version": "0.37.0.dev0",
   "activation_fn": "gelu-approximate",
   "attention_bias": true,
   "attention_head_dim": 32,
   "attention_out_bias": true,
   "audio_attention_head_dim": 32,
   "audio_cross_attention_dim": 64,
   "audio_hop_length": 160,
-  "audio_in_channels": 16,
   "audio_num_attention_heads": 2,
-  "audio_out_channels": 16,
   "audio_patch_size": 1,
   "audio_patch_size_t": 1,
   "audio_pos_embed_max_pos": 20,
@@ -21,7 +23,9 @@
   "caption_channels": 64,
   "causal_offset": 1,
   "cross_attention_dim": 64,
   "cross_attn_timestep_scale_multiplier": 1000,
   "in_channels": 4,
   "norm_elementwise_affine": false,
   "norm_eps": 1e-06,
@@ -30,12 +34,14 @@
   "out_channels": 4,
   "patch_size": 1,
   "patch_size_t": 1,
   "pos_embed_max_pos": 20,
   "qk_norm": "rms_norm_across_heads",
   "rope_double_precision": true,
   "rope_theta": 10000.0,
   "rope_type": "interleaved",
   "timestep_scale_multiplier": 1000,
   "vae_scale_factors": [
     8,
     32,

 {
   "_class_name": "LTX2VideoTransformer3DModel",
+  "_diffusers_version": "0.38.0.dev0",
   "activation_fn": "gelu-approximate",
   "attention_bias": true,
   "attention_head_dim": 32,
   "attention_out_bias": true,
   "audio_attention_head_dim": 32,
   "audio_cross_attention_dim": 64,
+  "audio_cross_attn_mod": false,
+  "audio_gated_attn": false,
   "audio_hop_length": 160,
+  "audio_in_channels": 128,
   "audio_num_attention_heads": 2,
+  "audio_out_channels": 128,
   "audio_patch_size": 1,
   "audio_patch_size_t": 1,
   "audio_pos_embed_max_pos": 20,
   "caption_channels": 64,
   "causal_offset": 1,
   "cross_attention_dim": 64,
+  "cross_attn_mod": false,
   "cross_attn_timestep_scale_multiplier": 1000,
+  "gated_attn": false,
   "in_channels": 4,
   "norm_elementwise_affine": false,
   "norm_eps": 1e-06,
   "out_channels": 4,
   "patch_size": 1,
   "patch_size_t": 1,
+  "perturbed_attn": false,
   "pos_embed_max_pos": 20,
   "qk_norm": "rms_norm_across_heads",
   "rope_double_precision": true,
   "rope_theta": 10000.0,
   "rope_type": "interleaved",
   "timestep_scale_multiplier": 1000,
+  "use_prompt_embeddings": true,
   "vae_scale_factors": [
     8,
     32,

transformer/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ac96571eb5a10bf4a6756fe181f450e2efae73ac023f312556445ff83e7cd31
-size 1164152

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ba703eed695c8dff6456e91b42ad4497b07b9d036870bb775e8074d907c36f0
+size 2364304

vocoder/config.json CHANGED Viewed

@@ -1,8 +1,14 @@
 {
   "_class_name": "LTX2Vocoder",
-  "_diffusers_version": "0.37.0.dev0",
   "hidden_channels": 32,
-  "in_channels": 32,
   "leaky_relu_negative_slope": 0.1,
   "out_channels": 2,
   "output_sampling_rate": 24000,

 {
   "_class_name": "LTX2Vocoder",
+  "_diffusers_version": "0.38.0.dev0",
+  "act_fn": "leaky_relu",
+  "antialias": false,
+  "antialias_kernel_size": 12,
+  "antialias_ratio": 2,
+  "final_act_fn": "tanh",
+  "final_bias": true,
   "hidden_channels": 32,
+  "in_channels": 128,
   "leaky_relu_negative_slope": 0.1,
   "out_channels": 2,
   "output_sampling_rate": 24000,

vocoder/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c337dfbd31cccec2be3ebc22db65ee3058c5f344aa58793bc52e9ed1ae67523
-size 29740

 version https://git-lfs.github.com/spec/v1
+oid sha256:885a3ef39987df2a9840e963d611ec1ed4172af8532e84c772a8fdbb89335f17
+size 143448