OliverPerrin commited on
Commit
60f8a12
·
1 Parent(s): 5e56615

Refactor: Fix B905 zip() strictness and resolve mypy error in evaluate.py

Browse files
scripts/eval_rouge.py CHANGED
@@ -165,7 +165,7 @@ def main() -> None:
165
  references = [item[1] for item in batch]
166
  predictions = pipeline.summarize(documents, max_length=args.max_length)
167
 
168
- for reference, prediction in zip(references, predictions):
169
  scores = scorer.score(reference, prediction)
170
  for metric_name, score in scores.items():
171
  score_store[metric_name]["precision"].append(score.precision)
 
165
  references = [item[1] for item in batch]
166
  predictions = pipeline.summarize(documents, max_length=args.max_length)
167
 
168
+ for reference, prediction in zip(references, predictions, strict=False):
169
  scores = scorer.score(reference, prediction)
170
  for metric_name, score in scores.items():
171
  score_store[metric_name]["precision"].append(score.precision)
scripts/evaluate.py CHANGED
@@ -9,7 +9,7 @@ import argparse
9
  import json
10
  import sys
11
  from pathlib import Path
12
- from typing import List
13
 
14
  import torch
15
  from sklearn.preprocessing import MultiLabelBinarizer
@@ -44,13 +44,13 @@ SPLIT_ALIASES = {
44
  }
45
 
46
 
47
- def _read_split(root: Path, split: str, loader) -> list:
48
  aliases = SPLIT_ALIASES.get(split, (split,))
49
  for alias in aliases:
50
  for ext in ("jsonl", "json"):
51
  candidate = root / f"{alias}.{ext}"
52
  if candidate.exists():
53
- return loader(str(candidate))
54
  raise FileNotFoundError(f"Missing {split} split under {root}")
55
 
56
 
@@ -152,7 +152,7 @@ def main() -> None:
152
  inputs = [example.text for example in batch]
153
  predictions = pipeline.predict_emotions(inputs)
154
  target_matrix = emotion_binarizer.transform([list(example.emotions) for example in batch])
155
- for pred, target_row in zip(predictions, target_matrix):
156
  vector = torch.zeros(len(metadata.emotion), dtype=torch.float32)
157
  for label in pred.labels:
158
  idx = label_to_index.get(label)
 
9
  import json
10
  import sys
11
  from pathlib import Path
12
+ from typing import Any, List, cast
13
 
14
  import torch
15
  from sklearn.preprocessing import MultiLabelBinarizer
 
44
  }
45
 
46
 
47
+ def _read_split(root: Path, split: str, loader) -> List[Any]:
48
  aliases = SPLIT_ALIASES.get(split, (split,))
49
  for alias in aliases:
50
  for ext in ("jsonl", "json"):
51
  candidate = root / f"{alias}.{ext}"
52
  if candidate.exists():
53
+ return cast(List[Any], loader(str(candidate)))
54
  raise FileNotFoundError(f"Missing {split} split under {root}")
55
 
56
 
 
152
  inputs = [example.text for example in batch]
153
  predictions = pipeline.predict_emotions(inputs)
154
  target_matrix = emotion_binarizer.transform([list(example.emotions) for example in batch])
155
+ for pred, target_row in zip(predictions, target_matrix, strict=False):
156
  vector = torch.zeros(len(metadata.emotion), dtype=torch.float32)
157
  for label in pred.labels:
158
  idx = label_to_index.get(label)
scripts/preprocess_data.py CHANGED
@@ -282,8 +282,8 @@ def preprocess_topic(
282
  random_state=seed,
283
  stratify=topics,
284
  )
285
- train_records = list(zip(train_texts, train_topics))
286
- val_rows = list(zip(val_texts, val_topics))
287
 
288
  def to_records(pairs: Sequence[Tuple[str, str]]) -> Iterator[Dict[str, object]]:
289
  for text, topic in pairs:
 
282
  random_state=seed,
283
  stratify=topics,
284
  )
285
+ train_records = list(zip(train_texts, train_topics, strict=False))
286
+ val_rows = list(zip(val_texts, val_topics, strict=False))
287
 
288
  def to_records(pairs: Sequence[Tuple[str, str]]) -> Iterator[Dict[str, object]]:
289
  for text, topic in pairs:
src/inference/pipeline.py CHANGED
@@ -132,7 +132,7 @@ class InferencePipeline:
132
  for row in probs.cpu():
133
  pairs = [
134
  (label, score)
135
- for label, score in zip(self.emotion_labels, row.tolist())
136
  if score >= decision_threshold
137
  ]
138
  labels = [label for label, _ in pairs]
 
132
  for row in probs.cpu():
133
  pairs = [
134
  (label, score)
135
+ for label, score in zip(self.emotion_labels, row.tolist(), strict=False)
136
  if score >= decision_threshold
137
  ]
138
  labels = [label for label, _ in pairs]
src/models/factory.py CHANGED
@@ -81,7 +81,9 @@ def _load_pretrained_weights(
81
  # Skip positional encoding - BART uses learned positions, I use sinusoidal
82
  # implementation will work fine with sinusoidal encodings
83
 
84
- for _i, (custom_layer, bart_layer) in enumerate(zip(encoder.layers, bart.encoder.layers)):
 
 
85
  # Self-attention
86
  custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
87
  custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
@@ -115,7 +117,9 @@ def _load_pretrained_weights(
115
  decoder.embedding.weight.data.copy_(bart.decoder.embed_tokens.weight.data)
116
  # Skip positional encoding - BART uses learned positions, we use sinusoidal
117
 
118
- for _i, (custom_layer, bart_layer) in enumerate(zip(decoder.layers, bart.decoder.layers)):
 
 
119
  # Self-attention
120
  custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
121
  custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
 
81
  # Skip positional encoding - BART uses learned positions, I use sinusoidal
82
  # implementation will work fine with sinusoidal encodings
83
 
84
+ for _i, (custom_layer, bart_layer) in enumerate(
85
+ zip(encoder.layers, bart.encoder.layers, strict=False)
86
+ ):
87
  # Self-attention
88
  custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
89
  custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
 
117
  decoder.embedding.weight.data.copy_(bart.decoder.embed_tokens.weight.data)
118
  # Skip positional encoding - BART uses learned positions, we use sinusoidal
119
 
120
+ for _i, (custom_layer, bart_layer) in enumerate(
121
+ zip(decoder.layers, bart.decoder.layers, strict=False)
122
+ ):
123
  # Self-attention
124
  custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
125
  custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
src/training/metrics.py CHANGED
@@ -28,7 +28,7 @@ def rouge_like(predictions: Sequence[str], references: Sequence[str]) -> float:
28
  if not predictions or not references:
29
  return 0.0
30
  scores = []
31
- for pred, ref in zip(predictions, references):
32
  pred_tokens = pred.split()
33
  ref_tokens = ref.split()
34
  if not ref_tokens:
@@ -46,7 +46,7 @@ def calculate_bleu(predictions: Sequence[str], references: Sequence[str]) -> flo
46
 
47
  smoother = SmoothingFunction().method1
48
  scores = []
49
- for pred, ref in zip(predictions, references):
50
  pred_tokens = pred.split()
51
  ref_tokens = [ref.split()] # BLEU expects list of references
52
  scores.append(sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smoother))
 
28
  if not predictions or not references:
29
  return 0.0
30
  scores = []
31
+ for pred, ref in zip(predictions, references, strict=False):
32
  pred_tokens = pred.split()
33
  ref_tokens = ref.split()
34
  if not ref_tokens:
 
46
 
47
  smoother = SmoothingFunction().method1
48
  scores = []
49
+ for pred, ref in zip(predictions, references, strict=False):
50
  pred_tokens = pred.split()
51
  ref_tokens = [ref.split()] # BLEU expects list of references
52
  scores.append(sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smoother))