Upload folder using huggingface_hub

b822524 verified about 1 year ago

4.57 kB

	import json
	import ast
	import os
	from openai import OpenAI


	def load_jsonl(jsonl_file_path):
	"""
	Load a JSONL (JSON Lines) file.

	Args:
	jsonl_file_path (str): Path to the .jsonl file.

	Returns:
	List[dict]: List of parsed JSON objects.

	Raises:
	FileNotFoundError: If the file does not exist.
	json.JSONDecodeError: If there's an error in parsing JSON.
	"""
	data = []
	if not os.path.exists(jsonl_file_path):
	raise FileNotFoundError(f"The file {jsonl_file_path} does not exist.")

	with open(jsonl_file_path, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line: # Skip empty lines safely
	try:
	data.append(json.loads(line))
	except json.JSONDecodeError:
	print(f"Error decoding JSON on line: {line}")
	return data


	def save_jsonl(path, data):
	"""
	Save a list of dictionaries to a .jsonl file.

	Args:
	path (str): Output file path.
	data (List[dict]): List of dictionaries to save.

	Raises:
	IOError: If there's an error opening or writing to the file.
	"""
	try:
	with open(path, 'w', encoding='utf-8') as f:
	for item in data:
	f.write(json.dumps(item, ensure_ascii=False) + '\n')
	except IOError as e:
	print(f"Failed to save to {path}: {e}")


	def parse_list(text):
	"""
	Safely parse a string to a list. If parsing fails, return None and log the error.

	Args:
	text (str): The string to parse.

	Returns:
	List[str] or None: A list of strings if successful, otherwise None.
	"""
	try:
	result_list = ast.literal_eval(text)
	if isinstance(result_list, list) and all(isinstance(item, str) for item in result_list):
	return result_list
	else:
	raise ValueError("Parsed response is not a list of strings.")
	except (ValueError, SyntaxError) as e:
	print(f"Failed to parse response: {text}. Error: {e}")
	return None


	def generate_questions(labels):
	"""
	Generates yes/no presence questions for each detected object label.

	Args:
	labels (List[str]): List of detected object labels.

	Returns:
	List[str]: List of questions like "Is a cat in this video?"
	"""
	questions = [f"Is {label} in this video?" for label in labels]
	return questions


	def build_spatial_question_prompt(object_list):
	"""
	Builds a spatial question prompt based on an object list.

	Args:
	object_list (List[dict]): List of objects detected in the video.

	Returns:
	str: Formatted prompt for generating spatial questions.
	"""
	with open("prompt/prompt_generate_spatial_question.txt", "r") as f:
	template = f.read()
	prompt = template.replace("{object_list}", str(object_list))
	return prompt


	def build_temporal_question_prompt(action_list):
	"""
	Builds a temporal question prompt based on an action list.

	Args:
	action_list (List[str]): List of actions detected in the video.

	Returns:
	str: Formatted prompt for generating temporal questions.
	"""
	with open("prompt/prompt_generate_temporal_question.txt", "r") as f:
	template = f.read()
	prompt = template.replace("{action_list}", str(action_list))
	return prompt


	def ask_gpt4o_mini(prompt):
	"""
	Sends the prompt to GPT-4o-mini and gets the raw response.

	Args:
	prompt (str): The prompt to send to GPT-4o-mini.

	Returns:
	str: The raw response from GPT-4o-mini.

	Raises:
	Exception: If there's an issue with the GPT-4o-mini API call.
	"""
	try:
	client = OpenAI()
	response = client.responses.create(
	model="gpt-4o-mini",
	input=prompt,
	temperature=0.1,
	max_output_tokens=128,
	)
	return response.output_text.strip()
	except Exception as e:
	print(f"Error with GPT-4o-mini API call: {e}")
	return ""


	def filter_by_frame_cnt(object_info, frame_cnt_thres):
	filtered_objects = []

	# Loop through the objects in object_info
	for obj_id in object_info['objects']:
	# Safely get the count of the object, defaulting to 0 if not found
	count = object_info['object_counts'].get(obj_id, 0)

	if count >= frame_cnt_thres:
	filtered_objects.append(obj_id)

	return filtered_objects