VideoHallucination / utils.py
Ruiyang-061X's picture
Upload folder using huggingface_hub
b822524 verified
import json
import ast
import os
from openai import OpenAI
def load_jsonl(jsonl_file_path):
"""
Load a JSONL (JSON Lines) file.
Args:
jsonl_file_path (str): Path to the .jsonl file.
Returns:
List[dict]: List of parsed JSON objects.
Raises:
FileNotFoundError: If the file does not exist.
json.JSONDecodeError: If there's an error in parsing JSON.
"""
data = []
if not os.path.exists(jsonl_file_path):
raise FileNotFoundError(f"The file {jsonl_file_path} does not exist.")
with open(jsonl_file_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line: # Skip empty lines safely
try:
data.append(json.loads(line))
except json.JSONDecodeError:
print(f"Error decoding JSON on line: {line}")
return data
def save_jsonl(path, data):
"""
Save a list of dictionaries to a .jsonl file.
Args:
path (str): Output file path.
data (List[dict]): List of dictionaries to save.
Raises:
IOError: If there's an error opening or writing to the file.
"""
try:
with open(path, 'w', encoding='utf-8') as f:
for item in data:
f.write(json.dumps(item, ensure_ascii=False) + '\n')
except IOError as e:
print(f"Failed to save to {path}: {e}")
def parse_list(text):
"""
Safely parse a string to a list. If parsing fails, return None and log the error.
Args:
text (str): The string to parse.
Returns:
List[str] or None: A list of strings if successful, otherwise None.
"""
try:
result_list = ast.literal_eval(text)
if isinstance(result_list, list) and all(isinstance(item, str) for item in result_list):
return result_list
else:
raise ValueError("Parsed response is not a list of strings.")
except (ValueError, SyntaxError) as e:
print(f"Failed to parse response: {text}. Error: {e}")
return None
def generate_questions(labels):
"""
Generates yes/no presence questions for each detected object label.
Args:
labels (List[str]): List of detected object labels.
Returns:
List[str]: List of questions like "Is a cat in this video?"
"""
questions = [f"Is {label} in this video?" for label in labels]
return questions
def build_spatial_question_prompt(object_list):
"""
Builds a spatial question prompt based on an object list.
Args:
object_list (List[dict]): List of objects detected in the video.
Returns:
str: Formatted prompt for generating spatial questions.
"""
with open("prompt/prompt_generate_spatial_question.txt", "r") as f:
template = f.read()
prompt = template.replace("{object_list}", str(object_list))
return prompt
def build_temporal_question_prompt(action_list):
"""
Builds a temporal question prompt based on an action list.
Args:
action_list (List[str]): List of actions detected in the video.
Returns:
str: Formatted prompt for generating temporal questions.
"""
with open("prompt/prompt_generate_temporal_question.txt", "r") as f:
template = f.read()
prompt = template.replace("{action_list}", str(action_list))
return prompt
def ask_gpt4o_mini(prompt):
"""
Sends the prompt to GPT-4o-mini and gets the raw response.
Args:
prompt (str): The prompt to send to GPT-4o-mini.
Returns:
str: The raw response from GPT-4o-mini.
Raises:
Exception: If there's an issue with the GPT-4o-mini API call.
"""
try:
client = OpenAI()
response = client.responses.create(
model="gpt-4o-mini",
input=prompt,
temperature=0.1,
max_output_tokens=128,
)
return response.output_text.strip()
except Exception as e:
print(f"Error with GPT-4o-mini API call: {e}")
return ""
def filter_by_frame_cnt(object_info, frame_cnt_thres):
filtered_objects = []
# Loop through the objects in object_info
for obj_id in object_info['objects']:
# Safely get the count of the object, defaulting to 0 if not found
count = object_info['object_counts'].get(obj_id, 0)
if count >= frame_cnt_thres:
filtered_objects.append(obj_id)
return filtered_objects