Spaces:
Sleeping
Sleeping
| import logging | |
| import re | |
| from openpyxl import load_workbook | |
| # Debug Mode (Set to False for production) | |
| DEBUG_MODE = True | |
| # Logging Configuration | |
| logging.basicConfig( | |
| level=logging.DEBUG if DEBUG_MODE else logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s", | |
| handlers=[ | |
| logging.FileHandler("debug.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| def process_xlsx(file_path): | |
| logging.info(f"Processing XLSX file: {file_path}") | |
| try: | |
| # Load XLSX content | |
| wb = load_workbook(filename=file_path) | |
| sheet = wb.active | |
| xlsx_content = [[cell.value for cell in row] for row in sheet.rows] | |
| # Initialization | |
| column_names = [] | |
| department_program_courses = {} | |
| current_department = None | |
| current_program = None | |
| # Determine column indices | |
| header_index = 0 | |
| while header_index < len(xlsx_content): | |
| if "Course Code" in [x for x in xlsx_content[header_index] if x]: | |
| break | |
| header_index += 1 | |
| column_names = xlsx_content[header_index] | |
| cr_index = [i for i, x in enumerate(column_names) if re.match(r"Cr", str(x))] | |
| if not cr_index: | |
| logging.error("Could not find 'Cr' column index.") | |
| return None | |
| cr_index = cr_index[0] | |
| # Process rows | |
| for index, row in enumerate(xlsx_content): | |
| if index <= header_index: | |
| continue | |
| # Department Row Detection (Loose pattern for "Cr") | |
| if row[0] and row[cr_index] and re.match(r"cr", str(row[cr_index]), re.IGNORECASE): | |
| current_department = row[0] | |
| department_program_courses.setdefault(current_department, {}) | |
| current_program = None | |
| logging.debug(f"Detected Department: {current_department}") | |
| # Program Row Detection (Empty "Cr" column) | |
| elif row[0] and not row[cr_index]: | |
| current_program = row[0] | |
| department_program_courses[current_department].setdefault(current_program, []) | |
| logging.debug(f"Detected Program under {current_department}: {current_program}") | |
| # Course Row Detection (Numeric "Cr" value) | |
| elif row[0] and isinstance(row[cr_index], (int, float)): | |
| course_codes = [row[0]] # Default to single course code | |
| # Handle special case (e.g., "STA421/521") | |
| if "/" in row[0]: | |
| start, end = row[0].split("/") | |
| course_codes = [start, start[:3] + end] | |
| logging.info(f"Splitting course for row: {repr(row)}") | |
| logging.info(f"course_codes: {course_codes}") | |
| for code in course_codes: | |
| new_row = row[:] # Copy original row | |
| new_row[0] = code # Update course code for each split course | |
| # Assign courses to program if exists, otherwise directly to department | |
| if current_program: | |
| department_program_courses[current_department][current_program].append(new_row) | |
| else: | |
| department_program_courses[current_department].setdefault(current_department, []).append(new_row) | |
| logging.debug(f"Added Course(s) {course_codes} under {current_program or 'directly in department'} in {current_department}") | |
| elif row[0]: | |
| logging.info(f"Skipping row: {repr(row)}") | |
| return (column_names, department_program_courses) | |
| except Exception as e: | |
| logging.error(f"An error occurred: {str(e)}") | |
| return None | |
| if __name__ == "__main__": | |
| file_path = "data/FTCM_Course_List_Spring2025.xlsx" | |
| result = process_xlsx(file_path) | |
| if result: | |
| column_names, department_program_courses = result | |
| print("Column Names:") | |
| # Modify column names to have no spaces and no line breaks | |
| modified_column_names = [name.replace(' ', '').replace('\n', '') if name else name for name in column_names] | |
| print(modified_column_names) | |
| print("\nDepartment, Program, Courses:") | |
| for department, programs in department_program_courses.items(): | |
| print(f"**Department: {department}**") | |
| for program, courses in programs.items(): | |
| print(f" Program: *{program}") | |
| for course in courses: | |
| print(f" - Course: {course}") | |
| else: | |
| print("Failed to process XLSX file") |