Spaces:
Sleeping
Sleeping
File size: 35,869 Bytes
c92d227 83e507b 0212d62 c92d227 0212d62 c92d227 0212d62 83e507b 0212d62 c92d227 0212d62 c92d227 0212d62 0315c73 c92d227 0212d62 c92d227 e024e83 0212d62 e024e83 0212d62 e024e83 0212d62 e024e83 0212d62 e024e83 0212d62 e024e83 0212d62 e024e83 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 83e507b 0212d62 83e507b 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 83e507b 0212d62 e024e83 0212d62 e024e83 0212d62 c92d227 23be0fc c92d227 83e507b c92d227 0212d62 c92d227 83e507b c92d227 23be0fc c92d227 83e507b 0212d62 c92d227 83e507b c92d227 83e507b c92d227 0212d62 c92d227 e024e83 0212d62 e024e83 0212d62 c92d227 f3f6374 c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 83e507b c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 c92d227 0212d62 e024e83 0212d62 e024e83 0212d62 23be0fc 0212d62 c92d227 23be0fc c92d227 0212d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 |
## Ultimate Smart ATS System - June 2025 Edition
## Incorporates Latest AI Strategies with Deterministic Consistency
import streamlit as st
import os
import re
import tempfile
import sqlite3
import hashlib
import json
import time
import pickle
from datetime import datetime
from pathlib import Path
from functools import wraps
from PyPDF2 import PdfReader
import google.generativeai as genai
from dotenv import load_dotenv
# Optional imports with fallbacks
try:
import pdfplumber
HAS_PDFPLUMBER = True
except ImportError:
HAS_PDFPLUMBER = False
try:
import docx
HAS_DOCX = True
except ImportError:
HAS_DOCX = False
try:
import tiktoken
HAS_TIKTOKEN = True
except ImportError:
HAS_TIKTOKEN = False
# Loading the .env keys
load_dotenv()
# Create cache directory
CACHE_DIR = Path("ats_cache")
CACHE_DIR.mkdir(exist_ok=True)
# MASTER UNIVERSAL SYSTEM PROMPT - Designed for Maximum Consistency & Global Applicability
UNIVERSAL_MASTER_PROMPT = """
You are the ULTIMATE ATS OPTIMIZATION ENGINE 3.0 - A state-of-the-art AI system designed to provide CONSISTENT, PRECISE, and GLOBALLY APPLICABLE resume analysis across ALL industries, roles, and experience levels.
**CORE OPERATING PRINCIPLES (June 2025 Standards):**
**CONSISTENCY PROTOCOL:**
- Always follow the EXACT same evaluation methodology for identical inputs
- Use standardized scoring matrices to ensure reproducible results
- Apply deterministic analysis patterns to eliminate variability
- Maintain consistent terminology and assessment criteria across all evaluations
**UNIVERSAL EVALUATION FRAMEWORK:**
**PHASE 1: TECHNICAL PARSING & KEYWORD OPTIMIZATION (25 Points)**
1. **ATS Compatibility Analysis:**
- Format compliance score (reverse-chronological preferred)
- Keyword density and natural integration assessment
- Critical missing terms identification (high-impact keywords only)
- Semantic relevance evaluation
2. **Section-by-Section Parsing:**
- Contact Information: Completeness and professionalism
- Professional Summary: Impact and relevance
- Experience: Achievement quantification and progression
- Skills: Technical and soft skills alignment
- Education: Relevance and credential verification
**PHASE 2: COMPETENCY MATCHING ANALYSIS (35 Points)**
1. **Hard Skills Assessment (20 points):**
- Technical competencies directly matching job requirements
- Tools, software, and platform expertise
- Industry-specific knowledge and certifications
- Quantifiable achievements and metrics
2. **Soft Skills Evaluation (15 points):**
- Leadership and communication indicators
- Problem-solving and analytical thinking evidence
- Adaptability and learning agility markers
- Team collaboration and project management skills
**PHASE 3: EXPERIENCE RELEVANCE SCORING (25 Points)**
1. **Direct Experience Match (15 points):**
- Same industry and role experience
- Progressive responsibility growth
- Relevant project complexity and scale
2. **Transferable Experience (10 points):**
- Adjacent industry or functional experience
- Cross-domain skills applicability
- Leadership and management experience
**PHASE 4: CULTURAL & GROWTH POTENTIAL (15 Points)**
1. **Growth Trajectory Analysis:**
- Career progression patterns
- Continuous learning evidence
- Innovation and initiative indicators
2. **Cultural Alignment Markers:**
- Values demonstration through experience
- Collaboration and team success stories
- Adaptability to organizational change
**STANDARDIZED OUTPUT STRUCTURE:**
Always provide results in this EXACT format for consistency:
**π― OVERALL MATCH SCORE: [XX/100]**
**π DETAILED BREAKDOWN:**
- Technical Parsing: [XX/25]
- Competency Matching: [XX/35]
- Experience Relevance: [XX/25]
- Growth Potential: [XX/15]
**β
TOP 5 STRENGTHS:**
1. [Specific strength with evidence]
2. [Specific strength with evidence]
3. [Specific strength with evidence]
4. [Specific strength with evidence]
5. [Specific strength with evidence]
**π§ TOP 3 IMPROVEMENT AREAS:**
1. [Specific area with actionable suggestion]
2. [Specific area with actionable suggestion]
3. [Specific area with actionable suggestion]
**π CRITICAL MISSING KEYWORDS:**
- [High-impact keyword 1]
- [High-impact keyword 2]
- [High-impact keyword 3]
**π‘ STRATEGIC RECOMMENDATIONS:**
[3-4 specific, actionable recommendations]
**π FINAL VERDICT:**
[EXCEPTIONAL 90-100 | STRONG 75-89 | GOOD 60-74 | DEVELOPING 45-59 | NEEDS WORK <45]
**CONSISTENCY GUARANTEES:**
- Same resume + same job description = identical analysis (Β±2 points variation max)
- Standardized language and terminology across all evaluations
- Reproducible scoring methodology regardless of domain
- Time-consistent results (same analysis today and tomorrow)
"""
# Specialized prompts that extend the master prompt for specific use cases
SPECIALIZED_PROMPTS = {
"evaluate_resume": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: COMPREHENSIVE RESUME EVALUATION**
Apply the Universal Evaluation Framework above to provide a complete assessment.
Focus on overall candidacy evaluation with balanced perspective on strengths and development areas.
Maintain professional tone suitable for HR professionals and hiring managers.
""",
"improve_skills": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: SKILL ENHANCEMENT STRATEGY**
After completing the standard evaluation, provide additional guidance:
**π SKILL DEVELOPMENT ROADMAP:**
- **Immediate Actions (0-3 months):** Quick wins and foundational improvements
- **Short-term Goals (3-12 months):** Structured learning and certification paths
- **Long-term Vision (1-3 years):** Strategic career advancement opportunities
**π LEARNING RESOURCES:**
- Recommended courses, certifications, and training programs
- Industry conferences and networking opportunities
- Practical projects and portfolio development suggestions
Focus on actionable, measurable improvement strategies with clear timelines.
""",
"missing_keywords": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: ATS KEYWORD OPTIMIZATION**
After completing the standard evaluation, provide enhanced keyword analysis:
**π ADVANCED KEYWORD ANALYSIS:**
- **CRITICAL MISSING (High Impact):** Essential terms significantly affecting ATS ranking
- **IMPORTANT ADDITIONS (Medium Impact):** Valuable terms improving visibility
- **OPTIMIZATION OPPORTUNITIES (Low Impact):** Supplementary terms for comprehensive coverage
**π INTEGRATION STRATEGY:**
- Specific resume sections for keyword placement
- Natural integration techniques avoiding keyword stuffing
- Industry-appropriate phrasing and terminology
**π€ ATS COMPATIBILITY SCORE:** [Detailed breakdown of parsing efficiency]
""",
"percentage_match": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: PRECISE MATCHING ANALYSIS**
Provide the standard evaluation with enhanced quantitative focus:
**π DETAILED SCORING BREAKDOWN:**
Present exact point allocation for each category with clear justification.
Include competitive benchmarking and market positioning analysis.
Provide specific improvement strategies for 10-15% score increase.
**π― MATCH PERCENTAGE: [XX%]**
Tier Classification with detailed rationale and next steps.
""",
"answer_query": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: EXPERT CONSULTATION**
Apply domain expertise to answer the specific query while considering:
- Resume content and job description context
- Industry best practices and current market trends
- Practical, actionable guidance
- Evidence-based recommendations
Provide thorough, well-researched responses with specific examples and multiple solution approaches when applicable.
""",
"executive_assessment": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: EXECUTIVE-LEVEL EVALUATION**
Apply enhanced criteria for senior leadership positions:
**π EXECUTIVE COMPETENCY FRAMEWORK:**
- Strategic thinking and vision development
- Change management and transformation leadership
- Financial acumen and business impact
- Board readiness and governance experience
**π LEADERSHIP IMPACT ANALYSIS:**
- Quantifiable business results and achievements
- Market expansion and competitive positioning
- Organizational culture and talent development
- Crisis leadership and resilience
Provide insights suitable for C-suite and board-level discussions.
""",
"career_transition": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: CAREER PIVOT ANALYSIS**
Evaluate career change feasibility with:
**π TRANSITION ASSESSMENT:**
- Transferable skills mapping across industries
- Market positioning strategy for career change
- Risk mitigation and success probability analysis
- Timeline and milestone planning
**π― TRANSITION ROADMAP:**
- Phase-wise transition strategy
- Skill development priorities
- Network building and industry immersion plan
Provide strategic guidance maximizing transition success while minimizing career risks.
"""
}
# Enhanced configuration for consistency
GENERATION_CONFIG = {
"temperature": 0.15, # Low temperature for maximum consistency
"top_p": 0.8,
"top_k": 40,
"max_output_tokens": 4096,
"stop_sequences": [],
}
# Model options optimized for consistency and performance
MODEL_FALLBACK_CHAIN = [
"gemini-2.5-flash", # π₯ PRIMARY - Latest Gemini 2.5
"gemini-2.5-flash-lite-preview-06-17" # π₯ FALLBACK - Lite version
]
# Rate limiting decorator
def rate_limit(min_interval=2):
def decorator(func):
last_called = [0]
@wraps(func)
def wrapper(*args, **kwargs):
elapsed = time.time() - last_called[0]
left_to_wait = min_interval - elapsed
if left_to_wait > 0:
time.sleep(left_to_wait)
result = func(*args, **kwargs)
last_called[0] = time.time()
return result
return wrapper
return decorator
# Cache management functions
def init_cache():
"""Initialize SQLite cache for consistency"""
conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS analysis_cache (
hash_key TEXT PRIMARY KEY,
response TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
model_used TEXT
)
""")
conn.commit()
conn.close()
def get_cached_response(consistency_hash, model_id):
"""Get cached response if available"""
try:
conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
cursor = conn.cursor()
cursor.execute(
"SELECT response FROM analysis_cache WHERE hash_key = ? AND model_used = ?",
(consistency_hash, model_id)
)
result = cursor.fetchone()
conn.close()
return result[0] if result else None
except:
return None
def cache_response(consistency_hash, response, model_id):
"""Cache the response for future use"""
try:
conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
cursor = conn.cursor()
cursor.execute(
"INSERT OR REPLACE INTO analysis_cache (hash_key, response, model_used) VALUES (?, ?, ?)",
(consistency_hash, response, model_id)
)
conn.commit()
conn.close()
except Exception as e:
st.warning(f"Cache save failed: {e}")
# Token estimation and content optimization
def estimate_tokens(text, model="gpt-3.5-turbo"):
"""Estimate token count for text"""
if HAS_TIKTOKEN:
try:
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
except:
pass
# Fallback estimation: roughly 4 characters per token
return len(text) // 4
def optimize_content_length(resume_text, job_description, max_resume_tokens=2000, max_job_tokens=1500):
"""Optimize content length to stay within token limits"""
# Prioritize key sections in resume
resume_sections = {
'experience': [],
'skills': [],
'education': [],
'summary': []
}
# Simple section detection
lines = resume_text.split('\n')
current_section = 'summary'
for line in lines:
line_lower = line.lower().strip()
if any(keyword in line_lower for keyword in ['experience', 'work', 'employment']):
current_section = 'experience'
elif any(keyword in line_lower for keyword in ['skills', 'technical', 'competencies']):
current_section = 'skills'
elif any(keyword in line_lower for keyword in ['education', 'academic', 'degree']):
current_section = 'education'
if line.strip():
resume_sections[current_section].append(line)
# Build optimized resume content
optimized_resume = []
# Add summary (first 300 chars)
if resume_sections['summary']:
summary_text = '\n'.join(resume_sections['summary'][:5])
optimized_resume.append(f"PROFESSIONAL SUMMARY:\n{summary_text[:300]}")
# Add experience (prioritize recent)
if resume_sections['experience']:
exp_text = '\n'.join(resume_sections['experience'][:15])
optimized_resume.append(f"WORK EXPERIENCE:\n{exp_text[:800]}")
# Add skills
if resume_sections['skills']:
skills_text = '\n'.join(resume_sections['skills'][:8])
optimized_resume.append(f"SKILLS:\n{skills_text[:400]}")
# Add education
if resume_sections['education']:
edu_text = '\n'.join(resume_sections['education'][:5])
optimized_resume.append(f"EDUCATION:\n{edu_text[:200]}")
optimized_resume_text = '\n\n'.join(optimized_resume)
# Ensure we're within token limits
resume_tokens = estimate_tokens(optimized_resume_text)
if resume_tokens > max_resume_tokens:
# Truncate if still too long
chars_per_token = len(optimized_resume_text) / resume_tokens
max_chars = int(max_resume_tokens * chars_per_token)
optimized_resume_text = optimized_resume_text[:max_chars] + "... [truncated]"
# Optimize job description
job_lines = job_description.split('\n')
important_lines = []
for line in job_lines:
line_lower = line.lower()
# Prioritize lines with key information
if any(keyword in line_lower for keyword in [
'require', 'must', 'essential', 'experience', 'skill',
'qualification', 'bachelor', 'master', 'year', 'certification'
]):
important_lines.append(line)
elif line.strip() and len(important_lines) < 20:
important_lines.append(line)
optimized_job = '\n'.join(important_lines)
# Ensure job description is within limits
job_tokens = estimate_tokens(optimized_job)
if job_tokens > max_job_tokens:
chars_per_token = len(optimized_job) / job_tokens
max_chars = int(max_job_tokens * chars_per_token)
optimized_job = optimized_job[:max_chars] + "... [truncated]"
return optimized_resume_text, optimized_job
def create_consistency_hash(resume_text, job_description, prompt_type):
"""Create a hash for identical inputs to ensure consistent outputs"""
content = f"{resume_text[:1000]}{job_description[:1000]}{prompt_type}"
return hashlib.md5(content.encode()).hexdigest()
def get_available_model(force_primary=True):
"""Get the first available model from the fallback chain"""
if force_primary:
# Force use of primary model without testing
return MODEL_FALLBACK_CHAIN[0] # gemini-2.5-flash
for model in MODEL_FALLBACK_CHAIN:
try:
st.info(f"π Testing model: {model}")
test_model = genai.GenerativeModel(
model,
safety_settings={
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
}
)
# Test with a simple prompt
test_response = test_model.generate_content(
"Say 'OK'",
generation_config=genai.types.GenerationConfig(
temperature=0.1,
max_output_tokens=10
)
)
if test_response.text:
st.success(f"β
{model} - Test successful!")
return model
else:
st.warning(f"β οΈ {model} - No response text")
except Exception as e:
st.error(f"β {model} - Failed: {str(e)}")
continue
raise Exception("No available Gemini models found")
@rate_limit(min_interval=2)
def get_consistent_gemini_response(model_id, prompt, pdf_content, input_text, consistency_hash):
"""Enhanced response generation with robust error handling"""
try:
model = genai.GenerativeModel(
model_id,
safety_settings={
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
}
)
# Add consistency instruction to prompt
enhanced_prompt = f"""
{prompt}
**CONSISTENCY PROTOCOL ACTIVE:**
Session ID: {consistency_hash}
Evaluation Date: {datetime.now().strftime('%Y-%m-%d')}
Apply identical methodology and scoring for consistent results.
Use deterministic analysis patterns and standardized language.
**RESUME CONTENT:**
{pdf_content[:3000]}
**JOB DESCRIPTION:**
{input_text[:2000]}
"""
response = model.generate_content(
enhanced_prompt,
generation_config=genai.types.GenerationConfig(**GENERATION_CONFIG)
)
# Enhanced error checking
if hasattr(response, 'candidates') and response.candidates:
candidate = response.candidates[0]
# Check finish reason
if hasattr(candidate, 'finish_reason'):
if candidate.finish_reason == 1: # STOP - Normal completion
return response.text if hasattr(response, 'text') and response.text else "Analysis completed but no content returned."
elif candidate.finish_reason == 2: # MAX_TOKENS
return "β οΈ Analysis truncated due to length. Please try with a shorter resume or job description."
elif candidate.finish_reason == 3: # SAFETY
return "β οΈ Content filtered for safety. Please review your input for any potentially problematic content."
elif candidate.finish_reason == 4: # RECITATION
return "β οΈ Content blocked due to recitation concerns. Please try rephrasing your input."
else:
return f"β οΈ Generation stopped with reason: {candidate.finish_reason}"
# Try to get text anyway
try:
return response.text if response.text else "No analysis content generated."
except:
return "Analysis completed but content could not be retrieved."
return "No response candidates generated. Please try again."
except Exception as e:
st.error(f"β οΈ Analysis Error: {str(e)}")
# Fallback with simpler model configuration
try:
simple_model = genai.GenerativeModel("gemini-pro")
simple_prompt = f"Analyze this resume against the job description:\n\nResume: {pdf_content[:1000]}\n\nJob: {input_text[:1000]}"
fallback_response = simple_model.generate_content(simple_prompt)
return f"β οΈ Using fallback analysis:\n\n{fallback_response.text}"
except:
return "Unable to complete analysis. Please check your API key, reduce content length, and try again."
def clean_extracted_text(text):
"""Clean and format extracted text"""
# Remove excessive whitespace
text = re.sub(r'\n\s*\n\s*\n', '\n\n', text)
text = re.sub(r'[ \t]+', ' ', text)
# Fix common extraction issues
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Add space before capitals
text = re.sub(r'(\w)([β’Β·βͺβ«])', r'\1 \2', text) # Space before bullets
text = re.sub(r'([β’Β·βͺβ«])(\w)', r'\1 \2', text) # Space after bullets
# Remove page markers
text = re.sub(r'--- Page \d+ ---', '', text)
# Normalize line endings
text = text.replace('\r\n', '\n').replace('\r', '\n')
# Remove empty lines at start and end
text = text.strip()
return text
def enhanced_pdf_processing(pdf_docs):
"""Enhanced PDF processing with better text extraction and formatting"""
text = ""
for doc in pdf_docs:
try:
if doc.name.endswith(".pdf"):
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(doc.getvalue())
tmp_path = tmp_file.name
try:
# Try multiple extraction methods
pdf_reader = PdfReader(tmp_path)
extracted_text = ""
for page_num, page in enumerate(pdf_reader.pages):
page_text = page.extract_text()
# Clean up common PDF extraction issues
page_text = re.sub(r'\s+', ' ', page_text) # Normalize whitespace
page_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', page_text) # Add spaces between words
extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n"
# If extraction is poor, try alternative method
if len(extracted_text.strip()) < 100 and HAS_PDFPLUMBER:
try:
with pdfplumber.open(tmp_path) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
extracted_text += page_text + "\n"
except Exception:
pass
text += extracted_text
finally:
# Clean up temporary file
os.unlink(tmp_path)
elif doc.name.endswith(".docx") and HAS_DOCX:
try:
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
tmp_file.write(doc.getvalue())
tmp_path = tmp_file.name
try:
doc_reader = docx.Document(tmp_path)
# Extract paragraphs
for para in doc_reader.paragraphs:
if para.text.strip():
text += para.text + "\n"
# Extract tables
for table in doc_reader.tables:
for row in table.rows:
row_text = " | ".join([cell.text.strip() for cell in row.cells])
if row_text.strip():
text += row_text + "\n"
finally:
os.unlink(tmp_path)
except Exception as e:
st.error(f"π Error processing DOCX {doc.name}: {str(e)}")
except Exception as e:
st.error(f"π Error processing {doc.name}: {str(e)}")
continue
# Clean and format the extracted text
text = clean_extracted_text(text)
return text
def validate_resume_content(text):
"""Validate that the extracted text looks like a resume"""
text_lower = text.lower()
# Check for common resume indicators
resume_indicators = [
'experience', 'education', 'skills', 'work', 'employment',
'university', 'college', 'degree', 'certification', 'project',
'email', 'phone', 'address', 'linkedin'
]
found_indicators = sum(1 for indicator in resume_indicators if indicator in text_lower)
if found_indicators < 3:
st.warning("β οΈ The uploaded file may not be a resume. Please verify the content.")
return False
if len(text.strip()) < 200:
st.warning("β οΈ The extracted text seems too short. Please check your file.")
return False
return True
def validate_configuration():
"""Validate system configuration"""
issues = []
# Check API key
if not os.getenv("GOOGLE_API_KEY") and not st.session_state.get("api_key"):
issues.append("β Google API Key not configured")
# Check optional packages
if not HAS_DOCX:
issues.append("β οΈ Optional: Install python-docx for better DOCX support (pip install python-docx)")
if not HAS_PDFPLUMBER:
issues.append("β οΈ Optional: Install pdfplumber for better PDF extraction (pip install pdfplumber)")
if not HAS_TIKTOKEN:
issues.append("β οΈ Optional: Install tiktoken for better token estimation (pip install tiktoken)")
return issues
@st.cache_data
def perform_enhanced_analysis(resume_text, job_description, analysis_type, custom_query=None, force_primary=True):
"""Main analysis function with all improvements"""
# Initialize cache
init_cache()
# Optimize content length
optimized_resume, optimized_job = optimize_content_length(resume_text, job_description)
# Create consistency hash
consistency_hash = create_consistency_hash(optimized_resume, optimized_job, analysis_type)
# Try to get from cache first
model_id = get_available_model(force_primary=force_primary)
cached_response = get_cached_response(consistency_hash, model_id)
if cached_response:
st.success("β‘ Retrieved from cache for consistency")
return cached_response, consistency_hash
# Select prompt
prompt_map = {
"evaluate": "evaluate_resume",
"improve": "improve_skills",
"keywords": "missing_keywords",
"match": "percentage_match",
"executive": "executive_assessment",
"transition": "career_transition",
"custom": "answer_query"
}
base_prompt = SPECIALIZED_PROMPTS[prompt_map.get(analysis_type, "evaluate_resume")]
if analysis_type == "custom" and custom_query:
base_prompt = f"{base_prompt}\n\nSPECIFIC QUERY: {custom_query}"
# Generate response
response = get_consistent_gemini_response(
model_id, base_prompt, optimized_resume, optimized_job, consistency_hash
)
# Cache the response
if response and not response.startswith("β οΈ"):
cache_response(consistency_hash, response, model_id)
return response, consistency_hash
# Streamlit App Configuration
st.set_page_config(
page_title="Smart ATS System",
page_icon="π",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for professional UI
st.markdown("""
<style>
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem;
border-radius: 15px;
text-align: center;
color: white;
margin-bottom: 2rem;
box-shadow: 0 10px 30px rgba(0,0,0,0.1);
}
.feature-card {
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
padding: 1.5rem;
border-radius: 12px;
border-left: 5px solid #667eea;
margin: 1rem 0;
box-shadow: 0 5px 15px rgba(0,0,0,0.08);
}
.metric-container {
background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
color: white;
padding: 1.5rem;
border-radius: 12px;
text-align: center;
margin: 1rem 0;
}
.consistency-badge {
background: #ffc107;
color: #212529;
padding: 0.5rem 1rem;
border-radius: 20px;
font-weight: bold;
display: inline-block;
margin: 0.5rem 0;
}
.stButton > button {
height: 3rem;
font-weight: 600;
}
</style>
""", unsafe_allow_html=True)
# Main Header
st.markdown("""
<div class="main-header">
<h1>π Smart ATS System</h1>
<p>AI-Powered Resume Analysis with Guaranteed Consistency | Global Multi-Domain Support</p>
<div class="consistency-badge">β
Consistent Results Guaranteed</div>
</div>
""", unsafe_allow_html=True)
# Sidebar Configuration
with st.sidebar:
st.markdown("### π Configuration")
st.markdown("[Get your Google API Key](https://aistudio.google.com/app/apikey)")
api_key = st.text_input("π Google API Key", type="password", help="Your Gemini API key for AI analysis")
st.session_state["api_key"] = api_key
# Model selection option
force_primary = st.checkbox("π― Force Primary Model", value=True, help="Use gemini-2.5-flash directly without testing")
st.session_state["force_primary"] = force_primary
if api_key:
try:
genai.configure(api_key=api_key)
model_id = get_available_model(force_primary=force_primary)
st.success(f"β
Connected to {model_id}")
except Exception as e:
st.error(f"β API Key Error: {str(e)}")
st.markdown("### π Document Upload")
uploaded_files = st.file_uploader(
"π Upload Resume (PDF/DOCX)",
type=["pdf", "docx"],
accept_multiple_files=True,
help="Upload your resume in PDF or DOCX format"
)
if uploaded_files:
st.success(f"β
{len(uploaded_files)} file(s) uploaded successfully!")
# Main Interface
st.markdown("### π Job Description Input")
input_text = st.text_area(
"Paste the complete job description here:",
height=150,
placeholder="Enter the full job description including requirements, responsibilities, and qualifications...",
help="The more detailed the job description, the more accurate the analysis"
)
# Analysis Buttons
st.markdown("### π― Choose Analysis Type")
col1, col2, col3 = st.columns(3)
with col1:
evaluate_btn = st.button("π Complete Evaluation", use_container_width=True)
improve_btn = st.button("π Skill Enhancement", use_container_width=True)
with col2:
keywords_btn = st.button("π Keyword Analysis", use_container_width=True)
match_btn = st.button("π― Match Percentage", use_container_width=True)
with col3:
executive_btn = st.button("π Executive Assessment", use_container_width=True)
transition_btn = st.button("π Career Transition", use_container_width=True)
# Custom Query Section
with st.expander("π¬ Ask Custom Question"):
custom_query = st.text_input("Ask any specific question about the resume or career advice:")
query_btn = st.button("π€ Get Expert Answer")
# Analysis Logic
analysis_triggered = any([evaluate_btn, improve_btn, keywords_btn, match_btn, executive_btn, transition_btn, query_btn])
if analysis_triggered:
# Validation
errors = []
if not api_key:
errors.append("π Please enter your Google API Key")
if not uploaded_files:
errors.append("π Please upload your resume")
if not input_text:
errors.append("π Please paste the job description")
if errors:
for error in errors:
st.error(error)
else:
# Process analysis
with st.spinner("π Analyzing with advanced AI algorithms..."):
pdf_content = enhanced_pdf_processing(uploaded_files)
# Validate content
if not validate_resume_content(pdf_content):
st.warning("β οΈ Please verify that your uploaded file is a valid resume.")
# Determine analysis type
if evaluate_btn:
analysis_type = "evaluate"
elif improve_btn:
analysis_type = "improve"
elif keywords_btn:
analysis_type = "keywords"
elif match_btn:
analysis_type = "match"
elif executive_btn:
analysis_type = "executive"
elif transition_btn:
analysis_type = "transition"
elif query_btn:
analysis_type = "custom"
try:
# Get force_primary setting
force_primary_setting = st.session_state.get("force_primary", True)
# Perform analysis
response, consistency_hash = perform_enhanced_analysis(
pdf_content, input_text, analysis_type, custom_query, force_primary_setting
)
# Display results
st.markdown("## π Analysis Results")
# Show metadata
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**Consistency ID:** `{consistency_hash[:8]}`")
with col2:
st.markdown(f"**Analysis Type:** {analysis_type.title()}")
st.markdown("---")
st.markdown(response)
# Show content optimization info
if st.checkbox("π Show Content Optimization Details"):
optimized_resume, optimized_job = optimize_content_length(pdf_content, input_text)
col1, col2 = st.columns(2)
with col1:
st.metric("Resume Tokens", estimate_tokens(optimized_resume))
st.metric("Original Resume Length", len(pdf_content))
with col2:
st.metric("Job Description Tokens", estimate_tokens(optimized_job))
st.metric("Original Job Length", len(input_text))
except Exception as e:
st.error(f"Analysis failed: {str(e)}")
st.info("Please try again with a shorter document or check your API key.")
# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #666;">
<p>π Smart ATS System | Powered by Advanced AI | Consistent β’ Reliable β’ Universal</p>
<p>Built with cutting-edge strategies for maximum ATS compatibility and career success</p>
</div>
""", unsafe_allow_html=True)
# Initialize cache on startup
if __name__ == "__main__":
init_cache() |