import json import ast import os from openai import OpenAI def load_jsonl(jsonl_file_path): """ Load a JSONL (JSON Lines) file. Args: jsonl_file_path (str): Path to the .jsonl file. Returns: List[dict]: List of parsed JSON objects. Raises: FileNotFoundError: If the file does not exist. json.JSONDecodeError: If there's an error in parsing JSON. """ data = [] if not os.path.exists(jsonl_file_path): raise FileNotFoundError(f"The file {jsonl_file_path} does not exist.") with open(jsonl_file_path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line: # Skip empty lines safely try: data.append(json.loads(line)) except json.JSONDecodeError: print(f"Error decoding JSON on line: {line}") return data def save_jsonl(path, data): """ Save a list of dictionaries to a .jsonl file. Args: path (str): Output file path. data (List[dict]): List of dictionaries to save. Raises: IOError: If there's an error opening or writing to the file. """ try: with open(path, 'w', encoding='utf-8') as f: for item in data: f.write(json.dumps(item, ensure_ascii=False) + '\n') except IOError as e: print(f"Failed to save to {path}: {e}") def parse_list(text): """ Safely parse a string to a list. If parsing fails, return None and log the error. Args: text (str): The string to parse. Returns: List[str] or None: A list of strings if successful, otherwise None. """ try: result_list = ast.literal_eval(text) if isinstance(result_list, list) and all(isinstance(item, str) for item in result_list): return result_list else: raise ValueError("Parsed response is not a list of strings.") except (ValueError, SyntaxError) as e: print(f"Failed to parse response: {text}. Error: {e}") return None def generate_questions(labels): """ Generates yes/no presence questions for each detected object label. Args: labels (List[str]): List of detected object labels. Returns: List[str]: List of questions like "Is a cat in this video?" """ questions = [f"Is {label} in this video?" for label in labels] return questions def build_spatial_question_prompt(object_list): """ Builds a spatial question prompt based on an object list. Args: object_list (List[dict]): List of objects detected in the video. Returns: str: Formatted prompt for generating spatial questions. """ with open("prompt/prompt_generate_spatial_question.txt", "r") as f: template = f.read() prompt = template.replace("{object_list}", str(object_list)) return prompt def build_temporal_question_prompt(action_list): """ Builds a temporal question prompt based on an action list. Args: action_list (List[str]): List of actions detected in the video. Returns: str: Formatted prompt for generating temporal questions. """ with open("prompt/prompt_generate_temporal_question.txt", "r") as f: template = f.read() prompt = template.replace("{action_list}", str(action_list)) return prompt def ask_gpt4o_mini(prompt): """ Sends the prompt to GPT-4o-mini and gets the raw response. Args: prompt (str): The prompt to send to GPT-4o-mini. Returns: str: The raw response from GPT-4o-mini. Raises: Exception: If there's an issue with the GPT-4o-mini API call. """ try: client = OpenAI() response = client.responses.create( model="gpt-4o-mini", input=prompt, temperature=0.1, max_output_tokens=128, ) return response.output_text.strip() except Exception as e: print(f"Error with GPT-4o-mini API call: {e}") return "" def filter_by_frame_cnt(object_info, frame_cnt_thres): filtered_objects = [] # Loop through the objects in object_info for obj_id in object_info['objects']: # Safely get the count of the object, defaulting to 0 if not found count = object_info['object_counts'].get(obj_id, 0) if count >= frame_cnt_thres: filtered_objects.append(obj_id) return filtered_objects