Spaces:
Sleeping
Sleeping
| ## Ultimate Smart ATS System - June 2025 Edition | |
| ## Incorporates Latest AI Strategies with Deterministic Consistency | |
| import streamlit as st | |
| import os | |
| import re | |
| import tempfile | |
| import sqlite3 | |
| import hashlib | |
| import json | |
| import time | |
| import pickle | |
| from datetime import datetime | |
| from pathlib import Path | |
| from functools import wraps | |
| from PyPDF2 import PdfReader | |
| import google.generativeai as genai | |
| from dotenv import load_dotenv | |
| # Optional imports with fallbacks | |
| try: | |
| import pdfplumber | |
| HAS_PDFPLUMBER = True | |
| except ImportError: | |
| HAS_PDFPLUMBER = False | |
| try: | |
| import docx | |
| HAS_DOCX = True | |
| except ImportError: | |
| HAS_DOCX = False | |
| try: | |
| import tiktoken | |
| HAS_TIKTOKEN = True | |
| except ImportError: | |
| HAS_TIKTOKEN = False | |
| # Loading the .env keys | |
| load_dotenv() | |
| # Create cache directory | |
| CACHE_DIR = Path("ats_cache") | |
| CACHE_DIR.mkdir(exist_ok=True) | |
| # MASTER UNIVERSAL SYSTEM PROMPT - Designed for Maximum Consistency & Global Applicability | |
| UNIVERSAL_MASTER_PROMPT = """ | |
| You are the ULTIMATE ATS OPTIMIZATION ENGINE 3.0 - A state-of-the-art AI system designed to provide CONSISTENT, PRECISE, and GLOBALLY APPLICABLE resume analysis across ALL industries, roles, and experience levels. | |
| **CORE OPERATING PRINCIPLES (June 2025 Standards):** | |
| **CONSISTENCY PROTOCOL:** | |
| - Always follow the EXACT same evaluation methodology for identical inputs | |
| - Use standardized scoring matrices to ensure reproducible results | |
| - Apply deterministic analysis patterns to eliminate variability | |
| - Maintain consistent terminology and assessment criteria across all evaluations | |
| **UNIVERSAL EVALUATION FRAMEWORK:** | |
| **PHASE 1: TECHNICAL PARSING & KEYWORD OPTIMIZATION (25 Points)** | |
| 1. **ATS Compatibility Analysis:** | |
| - Format compliance score (reverse-chronological preferred) | |
| - Keyword density and natural integration assessment | |
| - Critical missing terms identification (high-impact keywords only) | |
| - Semantic relevance evaluation | |
| 2. **Section-by-Section Parsing:** | |
| - Contact Information: Completeness and professionalism | |
| - Professional Summary: Impact and relevance | |
| - Experience: Achievement quantification and progression | |
| - Skills: Technical and soft skills alignment | |
| - Education: Relevance and credential verification | |
| **PHASE 2: COMPETENCY MATCHING ANALYSIS (35 Points)** | |
| 1. **Hard Skills Assessment (20 points):** | |
| - Technical competencies directly matching job requirements | |
| - Tools, software, and platform expertise | |
| - Industry-specific knowledge and certifications | |
| - Quantifiable achievements and metrics | |
| 2. **Soft Skills Evaluation (15 points):** | |
| - Leadership and communication indicators | |
| - Problem-solving and analytical thinking evidence | |
| - Adaptability and learning agility markers | |
| - Team collaboration and project management skills | |
| **PHASE 3: EXPERIENCE RELEVANCE SCORING (25 Points)** | |
| 1. **Direct Experience Match (15 points):** | |
| - Same industry and role experience | |
| - Progressive responsibility growth | |
| - Relevant project complexity and scale | |
| 2. **Transferable Experience (10 points):** | |
| - Adjacent industry or functional experience | |
| - Cross-domain skills applicability | |
| - Leadership and management experience | |
| **PHASE 4: CULTURAL & GROWTH POTENTIAL (15 Points)** | |
| 1. **Growth Trajectory Analysis:** | |
| - Career progression patterns | |
| - Continuous learning evidence | |
| - Innovation and initiative indicators | |
| 2. **Cultural Alignment Markers:** | |
| - Values demonstration through experience | |
| - Collaboration and team success stories | |
| - Adaptability to organizational change | |
| **STANDARDIZED OUTPUT STRUCTURE:** | |
| Always provide results in this EXACT format for consistency: | |
| **π― OVERALL MATCH SCORE: [XX/100]** | |
| **π DETAILED BREAKDOWN:** | |
| - Technical Parsing: [XX/25] | |
| - Competency Matching: [XX/35] | |
| - Experience Relevance: [XX/25] | |
| - Growth Potential: [XX/15] | |
| **β TOP 5 STRENGTHS:** | |
| 1. [Specific strength with evidence] | |
| 2. [Specific strength with evidence] | |
| 3. [Specific strength with evidence] | |
| 4. [Specific strength with evidence] | |
| 5. [Specific strength with evidence] | |
| **π§ TOP 3 IMPROVEMENT AREAS:** | |
| 1. [Specific area with actionable suggestion] | |
| 2. [Specific area with actionable suggestion] | |
| 3. [Specific area with actionable suggestion] | |
| **π CRITICAL MISSING KEYWORDS:** | |
| - [High-impact keyword 1] | |
| - [High-impact keyword 2] | |
| - [High-impact keyword 3] | |
| **π‘ STRATEGIC RECOMMENDATIONS:** | |
| [3-4 specific, actionable recommendations] | |
| **π FINAL VERDICT:** | |
| [EXCEPTIONAL 90-100 | STRONG 75-89 | GOOD 60-74 | DEVELOPING 45-59 | NEEDS WORK <45] | |
| **CONSISTENCY GUARANTEES:** | |
| - Same resume + same job description = identical analysis (Β±2 points variation max) | |
| - Standardized language and terminology across all evaluations | |
| - Reproducible scoring methodology regardless of domain | |
| - Time-consistent results (same analysis today and tomorrow) | |
| """ | |
| # Specialized prompts that extend the master prompt for specific use cases | |
| SPECIALIZED_PROMPTS = { | |
| "evaluate_resume": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: COMPREHENSIVE RESUME EVALUATION** | |
| Apply the Universal Evaluation Framework above to provide a complete assessment. | |
| Focus on overall candidacy evaluation with balanced perspective on strengths and development areas. | |
| Maintain professional tone suitable for HR professionals and hiring managers. | |
| """, | |
| "improve_skills": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: SKILL ENHANCEMENT STRATEGY** | |
| After completing the standard evaluation, provide additional guidance: | |
| **π SKILL DEVELOPMENT ROADMAP:** | |
| - **Immediate Actions (0-3 months):** Quick wins and foundational improvements | |
| - **Short-term Goals (3-12 months):** Structured learning and certification paths | |
| - **Long-term Vision (1-3 years):** Strategic career advancement opportunities | |
| **π LEARNING RESOURCES:** | |
| - Recommended courses, certifications, and training programs | |
| - Industry conferences and networking opportunities | |
| - Practical projects and portfolio development suggestions | |
| Focus on actionable, measurable improvement strategies with clear timelines. | |
| """, | |
| "missing_keywords": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: ATS KEYWORD OPTIMIZATION** | |
| After completing the standard evaluation, provide enhanced keyword analysis: | |
| **π ADVANCED KEYWORD ANALYSIS:** | |
| - **CRITICAL MISSING (High Impact):** Essential terms significantly affecting ATS ranking | |
| - **IMPORTANT ADDITIONS (Medium Impact):** Valuable terms improving visibility | |
| - **OPTIMIZATION OPPORTUNITIES (Low Impact):** Supplementary terms for comprehensive coverage | |
| **π INTEGRATION STRATEGY:** | |
| - Specific resume sections for keyword placement | |
| - Natural integration techniques avoiding keyword stuffing | |
| - Industry-appropriate phrasing and terminology | |
| **π€ ATS COMPATIBILITY SCORE:** [Detailed breakdown of parsing efficiency] | |
| """, | |
| "percentage_match": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: PRECISE MATCHING ANALYSIS** | |
| Provide the standard evaluation with enhanced quantitative focus: | |
| **π DETAILED SCORING BREAKDOWN:** | |
| Present exact point allocation for each category with clear justification. | |
| Include competitive benchmarking and market positioning analysis. | |
| Provide specific improvement strategies for 10-15% score increase. | |
| **π― MATCH PERCENTAGE: [XX%]** | |
| Tier Classification with detailed rationale and next steps. | |
| """, | |
| "answer_query": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: EXPERT CONSULTATION** | |
| Apply domain expertise to answer the specific query while considering: | |
| - Resume content and job description context | |
| - Industry best practices and current market trends | |
| - Practical, actionable guidance | |
| - Evidence-based recommendations | |
| Provide thorough, well-researched responses with specific examples and multiple solution approaches when applicable. | |
| """, | |
| "executive_assessment": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: EXECUTIVE-LEVEL EVALUATION** | |
| Apply enhanced criteria for senior leadership positions: | |
| **π EXECUTIVE COMPETENCY FRAMEWORK:** | |
| - Strategic thinking and vision development | |
| - Change management and transformation leadership | |
| - Financial acumen and business impact | |
| - Board readiness and governance experience | |
| **π LEADERSHIP IMPACT ANALYSIS:** | |
| - Quantifiable business results and achievements | |
| - Market expansion and competitive positioning | |
| - Organizational culture and talent development | |
| - Crisis leadership and resilience | |
| Provide insights suitable for C-suite and board-level discussions. | |
| """, | |
| "career_transition": f""" | |
| {UNIVERSAL_MASTER_PROMPT} | |
| **SPECIFIC TASK: CAREER PIVOT ANALYSIS** | |
| Evaluate career change feasibility with: | |
| **π TRANSITION ASSESSMENT:** | |
| - Transferable skills mapping across industries | |
| - Market positioning strategy for career change | |
| - Risk mitigation and success probability analysis | |
| - Timeline and milestone planning | |
| **π― TRANSITION ROADMAP:** | |
| - Phase-wise transition strategy | |
| - Skill development priorities | |
| - Network building and industry immersion plan | |
| Provide strategic guidance maximizing transition success while minimizing career risks. | |
| """ | |
| } | |
| # Enhanced configuration for consistency | |
| GENERATION_CONFIG = { | |
| "temperature": 0.15, # Low temperature for maximum consistency | |
| "top_p": 0.8, | |
| "top_k": 40, | |
| "max_output_tokens": 4096, | |
| "stop_sequences": [], | |
| } | |
| # Model options optimized for consistency and performance | |
| MODEL_FALLBACK_CHAIN = [ | |
| "gemini-2.5-flash", # π₯ PRIMARY - Latest Gemini 2.5 | |
| "gemini-2.5-flash-lite-preview-06-17" # π₯ FALLBACK - Lite version | |
| ] | |
| # Rate limiting decorator | |
| def rate_limit(min_interval=2): | |
| def decorator(func): | |
| last_called = [0] | |
| def wrapper(*args, **kwargs): | |
| elapsed = time.time() - last_called[0] | |
| left_to_wait = min_interval - elapsed | |
| if left_to_wait > 0: | |
| time.sleep(left_to_wait) | |
| result = func(*args, **kwargs) | |
| last_called[0] = time.time() | |
| return result | |
| return wrapper | |
| return decorator | |
| # Cache management functions | |
| def init_cache(): | |
| """Initialize SQLite cache for consistency""" | |
| conn = sqlite3.connect(CACHE_DIR / "ats_cache.db") | |
| cursor = conn.cursor() | |
| cursor.execute(""" | |
| CREATE TABLE IF NOT EXISTS analysis_cache ( | |
| hash_key TEXT PRIMARY KEY, | |
| response TEXT, | |
| timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, | |
| model_used TEXT | |
| ) | |
| """) | |
| conn.commit() | |
| conn.close() | |
| def get_cached_response(consistency_hash, model_id): | |
| """Get cached response if available""" | |
| try: | |
| conn = sqlite3.connect(CACHE_DIR / "ats_cache.db") | |
| cursor = conn.cursor() | |
| cursor.execute( | |
| "SELECT response FROM analysis_cache WHERE hash_key = ? AND model_used = ?", | |
| (consistency_hash, model_id) | |
| ) | |
| result = cursor.fetchone() | |
| conn.close() | |
| return result[0] if result else None | |
| except: | |
| return None | |
| def cache_response(consistency_hash, response, model_id): | |
| """Cache the response for future use""" | |
| try: | |
| conn = sqlite3.connect(CACHE_DIR / "ats_cache.db") | |
| cursor = conn.cursor() | |
| cursor.execute( | |
| "INSERT OR REPLACE INTO analysis_cache (hash_key, response, model_used) VALUES (?, ?, ?)", | |
| (consistency_hash, response, model_id) | |
| ) | |
| conn.commit() | |
| conn.close() | |
| except Exception as e: | |
| st.warning(f"Cache save failed: {e}") | |
| # Token estimation and content optimization | |
| def estimate_tokens(text, model="gpt-3.5-turbo"): | |
| """Estimate token count for text""" | |
| if HAS_TIKTOKEN: | |
| try: | |
| encoding = tiktoken.encoding_for_model(model) | |
| return len(encoding.encode(text)) | |
| except: | |
| pass | |
| # Fallback estimation: roughly 4 characters per token | |
| return len(text) // 4 | |
| def optimize_content_length(resume_text, job_description, max_resume_tokens=2000, max_job_tokens=1500): | |
| """Optimize content length to stay within token limits""" | |
| # Prioritize key sections in resume | |
| resume_sections = { | |
| 'experience': [], | |
| 'skills': [], | |
| 'education': [], | |
| 'summary': [] | |
| } | |
| # Simple section detection | |
| lines = resume_text.split('\n') | |
| current_section = 'summary' | |
| for line in lines: | |
| line_lower = line.lower().strip() | |
| if any(keyword in line_lower for keyword in ['experience', 'work', 'employment']): | |
| current_section = 'experience' | |
| elif any(keyword in line_lower for keyword in ['skills', 'technical', 'competencies']): | |
| current_section = 'skills' | |
| elif any(keyword in line_lower for keyword in ['education', 'academic', 'degree']): | |
| current_section = 'education' | |
| if line.strip(): | |
| resume_sections[current_section].append(line) | |
| # Build optimized resume content | |
| optimized_resume = [] | |
| # Add summary (first 300 chars) | |
| if resume_sections['summary']: | |
| summary_text = '\n'.join(resume_sections['summary'][:5]) | |
| optimized_resume.append(f"PROFESSIONAL SUMMARY:\n{summary_text[:300]}") | |
| # Add experience (prioritize recent) | |
| if resume_sections['experience']: | |
| exp_text = '\n'.join(resume_sections['experience'][:15]) | |
| optimized_resume.append(f"WORK EXPERIENCE:\n{exp_text[:800]}") | |
| # Add skills | |
| if resume_sections['skills']: | |
| skills_text = '\n'.join(resume_sections['skills'][:8]) | |
| optimized_resume.append(f"SKILLS:\n{skills_text[:400]}") | |
| # Add education | |
| if resume_sections['education']: | |
| edu_text = '\n'.join(resume_sections['education'][:5]) | |
| optimized_resume.append(f"EDUCATION:\n{edu_text[:200]}") | |
| optimized_resume_text = '\n\n'.join(optimized_resume) | |
| # Ensure we're within token limits | |
| resume_tokens = estimate_tokens(optimized_resume_text) | |
| if resume_tokens > max_resume_tokens: | |
| # Truncate if still too long | |
| chars_per_token = len(optimized_resume_text) / resume_tokens | |
| max_chars = int(max_resume_tokens * chars_per_token) | |
| optimized_resume_text = optimized_resume_text[:max_chars] + "... [truncated]" | |
| # Optimize job description | |
| job_lines = job_description.split('\n') | |
| important_lines = [] | |
| for line in job_lines: | |
| line_lower = line.lower() | |
| # Prioritize lines with key information | |
| if any(keyword in line_lower for keyword in [ | |
| 'require', 'must', 'essential', 'experience', 'skill', | |
| 'qualification', 'bachelor', 'master', 'year', 'certification' | |
| ]): | |
| important_lines.append(line) | |
| elif line.strip() and len(important_lines) < 20: | |
| important_lines.append(line) | |
| optimized_job = '\n'.join(important_lines) | |
| # Ensure job description is within limits | |
| job_tokens = estimate_tokens(optimized_job) | |
| if job_tokens > max_job_tokens: | |
| chars_per_token = len(optimized_job) / job_tokens | |
| max_chars = int(max_job_tokens * chars_per_token) | |
| optimized_job = optimized_job[:max_chars] + "... [truncated]" | |
| return optimized_resume_text, optimized_job | |
| def create_consistency_hash(resume_text, job_description, prompt_type): | |
| """Create a hash for identical inputs to ensure consistent outputs""" | |
| content = f"{resume_text[:1000]}{job_description[:1000]}{prompt_type}" | |
| return hashlib.md5(content.encode()).hexdigest() | |
| def get_available_model(force_primary=True): | |
| """Get the first available model from the fallback chain""" | |
| if force_primary: | |
| # Force use of primary model without testing | |
| return MODEL_FALLBACK_CHAIN[0] # gemini-2.5-flash | |
| for model in MODEL_FALLBACK_CHAIN: | |
| try: | |
| st.info(f"π Testing model: {model}") | |
| test_model = genai.GenerativeModel( | |
| model, | |
| safety_settings={ | |
| genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| } | |
| ) | |
| # Test with a simple prompt | |
| test_response = test_model.generate_content( | |
| "Say 'OK'", | |
| generation_config=genai.types.GenerationConfig( | |
| temperature=0.1, | |
| max_output_tokens=10 | |
| ) | |
| ) | |
| if test_response.text: | |
| st.success(f"β {model} - Test successful!") | |
| return model | |
| else: | |
| st.warning(f"β οΈ {model} - No response text") | |
| except Exception as e: | |
| st.error(f"β {model} - Failed: {str(e)}") | |
| continue | |
| raise Exception("No available Gemini models found") | |
| def get_consistent_gemini_response(model_id, prompt, pdf_content, input_text, consistency_hash): | |
| """Enhanced response generation with robust error handling""" | |
| try: | |
| model = genai.GenerativeModel( | |
| model_id, | |
| safety_settings={ | |
| genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE, | |
| } | |
| ) | |
| # Add consistency instruction to prompt | |
| enhanced_prompt = f""" | |
| {prompt} | |
| **CONSISTENCY PROTOCOL ACTIVE:** | |
| Session ID: {consistency_hash} | |
| Evaluation Date: {datetime.now().strftime('%Y-%m-%d')} | |
| Apply identical methodology and scoring for consistent results. | |
| Use deterministic analysis patterns and standardized language. | |
| **RESUME CONTENT:** | |
| {pdf_content[:3000]} | |
| **JOB DESCRIPTION:** | |
| {input_text[:2000]} | |
| """ | |
| response = model.generate_content( | |
| enhanced_prompt, | |
| generation_config=genai.types.GenerationConfig(**GENERATION_CONFIG) | |
| ) | |
| # Enhanced error checking | |
| if hasattr(response, 'candidates') and response.candidates: | |
| candidate = response.candidates[0] | |
| # Check finish reason | |
| if hasattr(candidate, 'finish_reason'): | |
| if candidate.finish_reason == 1: # STOP - Normal completion | |
| return response.text if hasattr(response, 'text') and response.text else "Analysis completed but no content returned." | |
| elif candidate.finish_reason == 2: # MAX_TOKENS | |
| return "β οΈ Analysis truncated due to length. Please try with a shorter resume or job description." | |
| elif candidate.finish_reason == 3: # SAFETY | |
| return "β οΈ Content filtered for safety. Please review your input for any potentially problematic content." | |
| elif candidate.finish_reason == 4: # RECITATION | |
| return "β οΈ Content blocked due to recitation concerns. Please try rephrasing your input." | |
| else: | |
| return f"β οΈ Generation stopped with reason: {candidate.finish_reason}" | |
| # Try to get text anyway | |
| try: | |
| return response.text if response.text else "No analysis content generated." | |
| except: | |
| return "Analysis completed but content could not be retrieved." | |
| return "No response candidates generated. Please try again." | |
| except Exception as e: | |
| st.error(f"β οΈ Analysis Error: {str(e)}") | |
| # Fallback with simpler model configuration | |
| try: | |
| simple_model = genai.GenerativeModel("gemini-pro") | |
| simple_prompt = f"Analyze this resume against the job description:\n\nResume: {pdf_content[:1000]}\n\nJob: {input_text[:1000]}" | |
| fallback_response = simple_model.generate_content(simple_prompt) | |
| return f"β οΈ Using fallback analysis:\n\n{fallback_response.text}" | |
| except: | |
| return "Unable to complete analysis. Please check your API key, reduce content length, and try again." | |
| def clean_extracted_text(text): | |
| """Clean and format extracted text""" | |
| # Remove excessive whitespace | |
| text = re.sub(r'\n\s*\n\s*\n', '\n\n', text) | |
| text = re.sub(r'[ \t]+', ' ', text) | |
| # Fix common extraction issues | |
| text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Add space before capitals | |
| text = re.sub(r'(\w)([β’Β·βͺβ«])', r'\1 \2', text) # Space before bullets | |
| text = re.sub(r'([β’Β·βͺβ«])(\w)', r'\1 \2', text) # Space after bullets | |
| # Remove page markers | |
| text = re.sub(r'--- Page \d+ ---', '', text) | |
| # Normalize line endings | |
| text = text.replace('\r\n', '\n').replace('\r', '\n') | |
| # Remove empty lines at start and end | |
| text = text.strip() | |
| return text | |
| def enhanced_pdf_processing(pdf_docs): | |
| """Enhanced PDF processing with better text extraction and formatting""" | |
| text = "" | |
| for doc in pdf_docs: | |
| try: | |
| if doc.name.endswith(".pdf"): | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(doc.getvalue()) | |
| tmp_path = tmp_file.name | |
| try: | |
| # Try multiple extraction methods | |
| pdf_reader = PdfReader(tmp_path) | |
| extracted_text = "" | |
| for page_num, page in enumerate(pdf_reader.pages): | |
| page_text = page.extract_text() | |
| # Clean up common PDF extraction issues | |
| page_text = re.sub(r'\s+', ' ', page_text) # Normalize whitespace | |
| page_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', page_text) # Add spaces between words | |
| extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n" | |
| # If extraction is poor, try alternative method | |
| if len(extracted_text.strip()) < 100 and HAS_PDFPLUMBER: | |
| try: | |
| with pdfplumber.open(tmp_path) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| extracted_text += page_text + "\n" | |
| except Exception: | |
| pass | |
| text += extracted_text | |
| finally: | |
| # Clean up temporary file | |
| os.unlink(tmp_path) | |
| elif doc.name.endswith(".docx") and HAS_DOCX: | |
| try: | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file: | |
| tmp_file.write(doc.getvalue()) | |
| tmp_path = tmp_file.name | |
| try: | |
| doc_reader = docx.Document(tmp_path) | |
| # Extract paragraphs | |
| for para in doc_reader.paragraphs: | |
| if para.text.strip(): | |
| text += para.text + "\n" | |
| # Extract tables | |
| for table in doc_reader.tables: | |
| for row in table.rows: | |
| row_text = " | ".join([cell.text.strip() for cell in row.cells]) | |
| if row_text.strip(): | |
| text += row_text + "\n" | |
| finally: | |
| os.unlink(tmp_path) | |
| except Exception as e: | |
| st.error(f"π Error processing DOCX {doc.name}: {str(e)}") | |
| except Exception as e: | |
| st.error(f"π Error processing {doc.name}: {str(e)}") | |
| continue | |
| # Clean and format the extracted text | |
| text = clean_extracted_text(text) | |
| return text | |
| def validate_resume_content(text): | |
| """Validate that the extracted text looks like a resume""" | |
| text_lower = text.lower() | |
| # Check for common resume indicators | |
| resume_indicators = [ | |
| 'experience', 'education', 'skills', 'work', 'employment', | |
| 'university', 'college', 'degree', 'certification', 'project', | |
| 'email', 'phone', 'address', 'linkedin' | |
| ] | |
| found_indicators = sum(1 for indicator in resume_indicators if indicator in text_lower) | |
| if found_indicators < 3: | |
| st.warning("β οΈ The uploaded file may not be a resume. Please verify the content.") | |
| return False | |
| if len(text.strip()) < 200: | |
| st.warning("β οΈ The extracted text seems too short. Please check your file.") | |
| return False | |
| return True | |
| def validate_configuration(): | |
| """Validate system configuration""" | |
| issues = [] | |
| # Check API key | |
| if not os.getenv("GOOGLE_API_KEY") and not st.session_state.get("api_key"): | |
| issues.append("β Google API Key not configured") | |
| # Check optional packages | |
| if not HAS_DOCX: | |
| issues.append("β οΈ Optional: Install python-docx for better DOCX support (pip install python-docx)") | |
| if not HAS_PDFPLUMBER: | |
| issues.append("β οΈ Optional: Install pdfplumber for better PDF extraction (pip install pdfplumber)") | |
| if not HAS_TIKTOKEN: | |
| issues.append("β οΈ Optional: Install tiktoken for better token estimation (pip install tiktoken)") | |
| return issues | |
| def perform_enhanced_analysis(resume_text, job_description, analysis_type, custom_query=None, force_primary=True): | |
| """Main analysis function with all improvements""" | |
| # Initialize cache | |
| init_cache() | |
| # Optimize content length | |
| optimized_resume, optimized_job = optimize_content_length(resume_text, job_description) | |
| # Create consistency hash | |
| consistency_hash = create_consistency_hash(optimized_resume, optimized_job, analysis_type) | |
| # Try to get from cache first | |
| model_id = get_available_model(force_primary=force_primary) | |
| cached_response = get_cached_response(consistency_hash, model_id) | |
| if cached_response: | |
| st.success("β‘ Retrieved from cache for consistency") | |
| return cached_response, consistency_hash | |
| # Select prompt | |
| prompt_map = { | |
| "evaluate": "evaluate_resume", | |
| "improve": "improve_skills", | |
| "keywords": "missing_keywords", | |
| "match": "percentage_match", | |
| "executive": "executive_assessment", | |
| "transition": "career_transition", | |
| "custom": "answer_query" | |
| } | |
| base_prompt = SPECIALIZED_PROMPTS[prompt_map.get(analysis_type, "evaluate_resume")] | |
| if analysis_type == "custom" and custom_query: | |
| base_prompt = f"{base_prompt}\n\nSPECIFIC QUERY: {custom_query}" | |
| # Generate response | |
| response = get_consistent_gemini_response( | |
| model_id, base_prompt, optimized_resume, optimized_job, consistency_hash | |
| ) | |
| # Cache the response | |
| if response and not response.startswith("β οΈ"): | |
| cache_response(consistency_hash, response, model_id) | |
| return response, consistency_hash | |
| # Streamlit App Configuration | |
| st.set_page_config( | |
| page_title="Smart ATS System", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for professional UI | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| padding: 2rem; | |
| border-radius: 15px; | |
| text-align: center; | |
| color: white; | |
| margin-bottom: 2rem; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.1); | |
| } | |
| .feature-card { | |
| background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); | |
| padding: 1.5rem; | |
| border-radius: 12px; | |
| border-left: 5px solid #667eea; | |
| margin: 1rem 0; | |
| box-shadow: 0 5px 15px rgba(0,0,0,0.08); | |
| } | |
| .metric-container { | |
| background: linear-gradient(135deg, #28a745 0%, #20c997 100%); | |
| color: white; | |
| padding: 1.5rem; | |
| border-radius: 12px; | |
| text-align: center; | |
| margin: 1rem 0; | |
| } | |
| .consistency-badge { | |
| background: #ffc107; | |
| color: #212529; | |
| padding: 0.5rem 1rem; | |
| border-radius: 20px; | |
| font-weight: bold; | |
| display: inline-block; | |
| margin: 0.5rem 0; | |
| } | |
| .stButton > button { | |
| height: 3rem; | |
| font-weight: 600; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Main Header | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>π Smart ATS System</h1> | |
| <p>AI-Powered Resume Analysis with Guaranteed Consistency | Global Multi-Domain Support</p> | |
| <div class="consistency-badge">β Consistent Results Guaranteed</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Sidebar Configuration | |
| with st.sidebar: | |
| st.markdown("### π Configuration") | |
| st.markdown("[Get your Google API Key](https://aistudio.google.com/app/apikey)") | |
| api_key = st.text_input("π Google API Key", type="password", help="Your Gemini API key for AI analysis") | |
| st.session_state["api_key"] = api_key | |
| # Model selection option | |
| force_primary = st.checkbox("π― Force Primary Model", value=True, help="Use gemini-2.5-flash directly without testing") | |
| st.session_state["force_primary"] = force_primary | |
| if api_key: | |
| try: | |
| genai.configure(api_key=api_key) | |
| model_id = get_available_model(force_primary=force_primary) | |
| st.success(f"β Connected to {model_id}") | |
| except Exception as e: | |
| st.error(f"β API Key Error: {str(e)}") | |
| st.markdown("### π Document Upload") | |
| uploaded_files = st.file_uploader( | |
| "π Upload Resume (PDF/DOCX)", | |
| type=["pdf", "docx"], | |
| accept_multiple_files=True, | |
| help="Upload your resume in PDF or DOCX format" | |
| ) | |
| if uploaded_files: | |
| st.success(f"β {len(uploaded_files)} file(s) uploaded successfully!") | |
| # Main Interface | |
| st.markdown("### π Job Description Input") | |
| input_text = st.text_area( | |
| "Paste the complete job description here:", | |
| height=150, | |
| placeholder="Enter the full job description including requirements, responsibilities, and qualifications...", | |
| help="The more detailed the job description, the more accurate the analysis" | |
| ) | |
| # Analysis Buttons | |
| st.markdown("### π― Choose Analysis Type") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| evaluate_btn = st.button("π Complete Evaluation", use_container_width=True) | |
| improve_btn = st.button("π Skill Enhancement", use_container_width=True) | |
| with col2: | |
| keywords_btn = st.button("π Keyword Analysis", use_container_width=True) | |
| match_btn = st.button("π― Match Percentage", use_container_width=True) | |
| with col3: | |
| executive_btn = st.button("π Executive Assessment", use_container_width=True) | |
| transition_btn = st.button("π Career Transition", use_container_width=True) | |
| # Custom Query Section | |
| with st.expander("π¬ Ask Custom Question"): | |
| custom_query = st.text_input("Ask any specific question about the resume or career advice:") | |
| query_btn = st.button("π€ Get Expert Answer") | |
| # Analysis Logic | |
| analysis_triggered = any([evaluate_btn, improve_btn, keywords_btn, match_btn, executive_btn, transition_btn, query_btn]) | |
| if analysis_triggered: | |
| # Validation | |
| errors = [] | |
| if not api_key: | |
| errors.append("π Please enter your Google API Key") | |
| if not uploaded_files: | |
| errors.append("π Please upload your resume") | |
| if not input_text: | |
| errors.append("π Please paste the job description") | |
| if errors: | |
| for error in errors: | |
| st.error(error) | |
| else: | |
| # Process analysis | |
| with st.spinner("π Analyzing with advanced AI algorithms..."): | |
| pdf_content = enhanced_pdf_processing(uploaded_files) | |
| # Validate content | |
| if not validate_resume_content(pdf_content): | |
| st.warning("β οΈ Please verify that your uploaded file is a valid resume.") | |
| # Determine analysis type | |
| if evaluate_btn: | |
| analysis_type = "evaluate" | |
| elif improve_btn: | |
| analysis_type = "improve" | |
| elif keywords_btn: | |
| analysis_type = "keywords" | |
| elif match_btn: | |
| analysis_type = "match" | |
| elif executive_btn: | |
| analysis_type = "executive" | |
| elif transition_btn: | |
| analysis_type = "transition" | |
| elif query_btn: | |
| analysis_type = "custom" | |
| try: | |
| # Get force_primary setting | |
| force_primary_setting = st.session_state.get("force_primary", True) | |
| # Perform analysis | |
| response, consistency_hash = perform_enhanced_analysis( | |
| pdf_content, input_text, analysis_type, custom_query, force_primary_setting | |
| ) | |
| # Display results | |
| st.markdown("## π Analysis Results") | |
| # Show metadata | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(f"**Consistency ID:** `{consistency_hash[:8]}`") | |
| with col2: | |
| st.markdown(f"**Analysis Type:** {analysis_type.title()}") | |
| st.markdown("---") | |
| st.markdown(response) | |
| # Show content optimization info | |
| if st.checkbox("π Show Content Optimization Details"): | |
| optimized_resume, optimized_job = optimize_content_length(pdf_content, input_text) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Resume Tokens", estimate_tokens(optimized_resume)) | |
| st.metric("Original Resume Length", len(pdf_content)) | |
| with col2: | |
| st.metric("Job Description Tokens", estimate_tokens(optimized_job)) | |
| st.metric("Original Job Length", len(input_text)) | |
| except Exception as e: | |
| st.error(f"Analysis failed: {str(e)}") | |
| st.info("Please try again with a shorter document or check your API key.") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style="text-align: center; color: #666;"> | |
| <p>π Smart ATS System | Powered by Advanced AI | Consistent β’ Reliable β’ Universal</p> | |
| <p>Built with cutting-edge strategies for maximum ATS compatibility and career success</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Initialize cache on startup | |
| if __name__ == "__main__": | |
| init_cache() |