Spaces:

OliverPerrin
/

LexiMind

Sleeping

App Files Files Community

OliverPerrin commited on Dec 3, 2025

Commit

60f8a12

1 Parent(s): 5e56615

Refactor: Fix B905 zip() strictness and resolve mypy error in evaluate.py

Browse files

Files changed (6) hide show

scripts/eval_rouge.py +1 -1
scripts/evaluate.py +4 -4
scripts/preprocess_data.py +2 -2
src/inference/pipeline.py +1 -1
src/models/factory.py +6 -2
src/training/metrics.py +2 -2

scripts/eval_rouge.py CHANGED Viewed

@@ -165,7 +165,7 @@ def main() -> None:
         references = [item[1] for item in batch]
         predictions = pipeline.summarize(documents, max_length=args.max_length)
-        for reference, prediction in zip(references, predictions):
             scores = scorer.score(reference, prediction)
             for metric_name, score in scores.items():
                 score_store[metric_name]["precision"].append(score.precision)

         references = [item[1] for item in batch]
         predictions = pipeline.summarize(documents, max_length=args.max_length)
+        for reference, prediction in zip(references, predictions, strict=False):
             scores = scorer.score(reference, prediction)
             for metric_name, score in scores.items():
                 score_store[metric_name]["precision"].append(score.precision)

scripts/evaluate.py CHANGED Viewed

@@ -9,7 +9,7 @@ import argparse
 import json
 import sys
 from pathlib import Path
-from typing import List
 import torch
 from sklearn.preprocessing import MultiLabelBinarizer
@@ -44,13 +44,13 @@ SPLIT_ALIASES = {
 }
-def _read_split(root: Path, split: str, loader) -> list:
     aliases = SPLIT_ALIASES.get(split, (split,))
     for alias in aliases:
         for ext in ("jsonl", "json"):
             candidate = root / f"{alias}.{ext}"
             if candidate.exists():
-                return loader(str(candidate))
     raise FileNotFoundError(f"Missing {split} split under {root}")
@@ -152,7 +152,7 @@ def main() -> None:
         inputs = [example.text for example in batch]
         predictions = pipeline.predict_emotions(inputs)
         target_matrix = emotion_binarizer.transform([list(example.emotions) for example in batch])
-        for pred, target_row in zip(predictions, target_matrix):
             vector = torch.zeros(len(metadata.emotion), dtype=torch.float32)
             for label in pred.labels:
                 idx = label_to_index.get(label)

 import json
 import sys
 from pathlib import Path
+from typing import Any, List, cast
 import torch
 from sklearn.preprocessing import MultiLabelBinarizer
 }
+def _read_split(root: Path, split: str, loader) -> List[Any]:
     aliases = SPLIT_ALIASES.get(split, (split,))
     for alias in aliases:
         for ext in ("jsonl", "json"):
             candidate = root / f"{alias}.{ext}"
             if candidate.exists():
+                return cast(List[Any], loader(str(candidate)))
     raise FileNotFoundError(f"Missing {split} split under {root}")
         inputs = [example.text for example in batch]
         predictions = pipeline.predict_emotions(inputs)
         target_matrix = emotion_binarizer.transform([list(example.emotions) for example in batch])
+        for pred, target_row in zip(predictions, target_matrix, strict=False):
             vector = torch.zeros(len(metadata.emotion), dtype=torch.float32)
             for label in pred.labels:
                 idx = label_to_index.get(label)

scripts/preprocess_data.py CHANGED Viewed

@@ -282,8 +282,8 @@ def preprocess_topic(
             random_state=seed,
             stratify=topics,
         )
-        train_records = list(zip(train_texts, train_topics))
-        val_rows = list(zip(val_texts, val_topics))
     def to_records(pairs: Sequence[Tuple[str, str]]) -> Iterator[Dict[str, object]]:
         for text, topic in pairs:

             random_state=seed,
             stratify=topics,
         )
+        train_records = list(zip(train_texts, train_topics, strict=False))
+        val_rows = list(zip(val_texts, val_topics, strict=False))
     def to_records(pairs: Sequence[Tuple[str, str]]) -> Iterator[Dict[str, object]]:
         for text, topic in pairs:

src/inference/pipeline.py CHANGED Viewed

@@ -132,7 +132,7 @@ class InferencePipeline:
         for row in probs.cpu():
             pairs = [
                 (label, score)
-                for label, score in zip(self.emotion_labels, row.tolist())
                 if score >= decision_threshold
             ]
             labels = [label for label, _ in pairs]

         for row in probs.cpu():
             pairs = [
                 (label, score)
+                for label, score in zip(self.emotion_labels, row.tolist(), strict=False)
                 if score >= decision_threshold
             ]
             labels = [label for label, _ in pairs]

src/models/factory.py CHANGED Viewed

@@ -81,7 +81,9 @@ def _load_pretrained_weights(
     # Skip positional encoding - BART uses learned positions, I use sinusoidal
     # implementation will work fine with sinusoidal encodings
-    for _i, (custom_layer, bart_layer) in enumerate(zip(encoder.layers, bart.encoder.layers)):
         # Self-attention
         custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
         custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
@@ -115,7 +117,9 @@ def _load_pretrained_weights(
     decoder.embedding.weight.data.copy_(bart.decoder.embed_tokens.weight.data)
     # Skip positional encoding - BART uses learned positions, we use sinusoidal
-    for _i, (custom_layer, bart_layer) in enumerate(zip(decoder.layers, bart.decoder.layers)):
         # Self-attention
         custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
         custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)

     # Skip positional encoding - BART uses learned positions, I use sinusoidal
     # implementation will work fine with sinusoidal encodings
+    for _i, (custom_layer, bart_layer) in enumerate(
+        zip(encoder.layers, bart.encoder.layers, strict=False)
+    ):
         # Self-attention
         custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
         custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)
     decoder.embedding.weight.data.copy_(bart.decoder.embed_tokens.weight.data)
     # Skip positional encoding - BART uses learned positions, we use sinusoidal
+    for _i, (custom_layer, bart_layer) in enumerate(
+        zip(decoder.layers, bart.decoder.layers, strict=False)
+    ):
         # Self-attention
         custom_layer.self_attn.W_Q.weight.data.copy_(bart_layer.self_attn.q_proj.weight.data)
         custom_layer.self_attn.W_Q.bias.data.copy_(bart_layer.self_attn.q_proj.bias.data)

src/training/metrics.py CHANGED Viewed

@@ -28,7 +28,7 @@ def rouge_like(predictions: Sequence[str], references: Sequence[str]) -> float:
     if not predictions or not references:
         return 0.0
     scores = []
-    for pred, ref in zip(predictions, references):
         pred_tokens = pred.split()
         ref_tokens = ref.split()
         if not ref_tokens:
@@ -46,7 +46,7 @@ def calculate_bleu(predictions: Sequence[str], references: Sequence[str]) -> flo
     smoother = SmoothingFunction().method1
     scores = []
-    for pred, ref in zip(predictions, references):
         pred_tokens = pred.split()
         ref_tokens = [ref.split()]  # BLEU expects list of references
         scores.append(sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smoother))

     if not predictions or not references:
         return 0.0
     scores = []
+    for pred, ref in zip(predictions, references, strict=False):
         pred_tokens = pred.split()
         ref_tokens = ref.split()
         if not ref_tokens:
     smoother = SmoothingFunction().method1
     scores = []
+    for pred, ref in zip(predictions, references, strict=False):
         pred_tokens = pred.split()
         ref_tokens = [ref.split()]  # BLEU expects list of references
         scores.append(sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smoother))