burtenshaw HF Staff commited on
Commit
a6f5f80
·
verified ·
1 Parent(s): 283e370

Upload train_qwen3_codeforces.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen3_codeforces.py +5 -0
train_qwen3_codeforces.py CHANGED
@@ -13,12 +13,17 @@ import trackio
13
  from datasets import load_dataset
14
  from peft import LoraConfig
15
  from trl import SFTTrainer, SFTConfig
 
16
 
17
  # Load dataset
18
  print("📦 Loading dataset...")
19
  dataset = load_dataset("open-r1/codeforces-cots", split="train")
20
  print(f"✅ Dataset loaded: {len(dataset)} examples")
21
 
 
 
 
 
22
  # Create train/eval split
23
  print("🔀 Creating train/eval split...")
24
  dataset_split = dataset.train_test_split(test_size=0.02, seed=42)
 
13
  from datasets import load_dataset
14
  from peft import LoraConfig
15
  from trl import SFTTrainer, SFTConfig
16
+ from transformers import AutoTokenizer
17
 
18
  # Load dataset
19
  print("📦 Loading dataset...")
20
  dataset = load_dataset("open-r1/codeforces-cots", split="train")
21
  print(f"✅ Dataset loaded: {len(dataset)} examples")
22
 
23
+ # Keep only the messages column (TRL SFT format)
24
+ dataset = dataset.select_columns(["messages"])
25
+ print(f"✅ Kept only 'messages' column")
26
+
27
  # Create train/eval split
28
  print("🔀 Creating train/eval split...")
29
  dataset_split = dataset.train_test_split(test_size=0.02, seed=42)