File size: 35,869 Bytes
c92d227
 
 
83e507b
 
0212d62
 
 
c92d227
 
0212d62
 
c92d227
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83e507b
 
 
 
0212d62
 
 
c92d227
0212d62
c92d227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0212d62
0315c73
 
c92d227
 
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c92d227
 
 
 
 
e024e83
0212d62
e024e83
 
 
 
0212d62
 
e024e83
 
0212d62
 
 
 
 
 
 
 
 
e024e83
0212d62
 
 
 
 
 
 
 
e024e83
0212d62
e024e83
0212d62
e024e83
 
 
 
 
0212d62
 
 
 
 
c92d227
0212d62
c92d227
0212d62
 
 
 
 
 
 
 
 
c92d227
 
 
 
 
 
 
 
 
 
0212d62
 
 
 
 
 
c92d227
 
 
0212d62
c92d227
 
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c92d227
 
 
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83e507b
0212d62
 
83e507b
0212d62
 
 
c92d227
0212d62
 
 
 
 
c92d227
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c92d227
0212d62
 
83e507b
 
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e024e83
0212d62
 
 
 
 
 
 
 
 
 
 
 
e024e83
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c92d227
 
23be0fc
c92d227
 
 
 
83e507b
c92d227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0212d62
 
 
 
c92d227
 
83e507b
c92d227
 
 
23be0fc
c92d227
 
 
 
83e507b
0212d62
 
c92d227
83e507b
c92d227
83e507b
c92d227
 
0212d62
c92d227
e024e83
 
 
 
0212d62
 
 
e024e83
0212d62
 
 
c92d227
 
 
 
 
 
 
 
 
f3f6374
c92d227
 
83e507b
c92d227
 
 
 
 
 
 
 
83e507b
c92d227
 
 
83e507b
 
c92d227
 
 
83e507b
c92d227
 
 
83e507b
c92d227
 
83e507b
c92d227
 
 
 
83e507b
c92d227
 
 
 
 
 
83e507b
c92d227
83e507b
c92d227
83e507b
c92d227
 
 
 
 
 
 
 
0212d62
c92d227
0212d62
 
 
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
c92d227
0212d62
e024e83
 
 
0212d62
 
e024e83
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23be0fc
0212d62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c92d227
 
 
 
 
23be0fc
c92d227
 
0212d62
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
## Ultimate Smart ATS System - June 2025 Edition
## Incorporates Latest AI Strategies with Deterministic Consistency

import streamlit as st
import os
import re
import tempfile
import sqlite3
import hashlib
import json
import time
import pickle
from datetime import datetime
from pathlib import Path
from functools import wraps
from PyPDF2 import PdfReader
import google.generativeai as genai
from dotenv import load_dotenv

# Optional imports with fallbacks
try:
    import pdfplumber
    HAS_PDFPLUMBER = True
except ImportError:
    HAS_PDFPLUMBER = False

try:
    import docx
    HAS_DOCX = True
except ImportError:
    HAS_DOCX = False

try:
    import tiktoken
    HAS_TIKTOKEN = True
except ImportError:
    HAS_TIKTOKEN = False

# Loading the .env keys
load_dotenv()

# Create cache directory
CACHE_DIR = Path("ats_cache")
CACHE_DIR.mkdir(exist_ok=True)

# MASTER UNIVERSAL SYSTEM PROMPT - Designed for Maximum Consistency & Global Applicability
UNIVERSAL_MASTER_PROMPT = """
You are the ULTIMATE ATS OPTIMIZATION ENGINE 3.0 - A state-of-the-art AI system designed to provide CONSISTENT, PRECISE, and GLOBALLY APPLICABLE resume analysis across ALL industries, roles, and experience levels.

**CORE OPERATING PRINCIPLES (June 2025 Standards):**

**CONSISTENCY PROTOCOL:**
- Always follow the EXACT same evaluation methodology for identical inputs
- Use standardized scoring matrices to ensure reproducible results
- Apply deterministic analysis patterns to eliminate variability
- Maintain consistent terminology and assessment criteria across all evaluations

**UNIVERSAL EVALUATION FRAMEWORK:**

**PHASE 1: TECHNICAL PARSING & KEYWORD OPTIMIZATION (25 Points)**
1. **ATS Compatibility Analysis:**
   - Format compliance score (reverse-chronological preferred)
   - Keyword density and natural integration assessment
   - Critical missing terms identification (high-impact keywords only)
   - Semantic relevance evaluation

2. **Section-by-Section Parsing:**
   - Contact Information: Completeness and professionalism
   - Professional Summary: Impact and relevance
   - Experience: Achievement quantification and progression
   - Skills: Technical and soft skills alignment
   - Education: Relevance and credential verification

**PHASE 2: COMPETENCY MATCHING ANALYSIS (35 Points)**
1. **Hard Skills Assessment (20 points):**
   - Technical competencies directly matching job requirements
   - Tools, software, and platform expertise
   - Industry-specific knowledge and certifications
   - Quantifiable achievements and metrics

2. **Soft Skills Evaluation (15 points):**
   - Leadership and communication indicators
   - Problem-solving and analytical thinking evidence
   - Adaptability and learning agility markers
   - Team collaboration and project management skills

**PHASE 3: EXPERIENCE RELEVANCE SCORING (25 Points)**
1. **Direct Experience Match (15 points):**
   - Same industry and role experience
   - Progressive responsibility growth
   - Relevant project complexity and scale

2. **Transferable Experience (10 points):**
   - Adjacent industry or functional experience
   - Cross-domain skills applicability
   - Leadership and management experience

**PHASE 4: CULTURAL & GROWTH POTENTIAL (15 Points)**
1. **Growth Trajectory Analysis:**
   - Career progression patterns
   - Continuous learning evidence
   - Innovation and initiative indicators

2. **Cultural Alignment Markers:**
   - Values demonstration through experience
   - Collaboration and team success stories
   - Adaptability to organizational change

**STANDARDIZED OUTPUT STRUCTURE:**
Always provide results in this EXACT format for consistency:

**🎯 OVERALL MATCH SCORE: [XX/100]**

**πŸ“Š DETAILED BREAKDOWN:**
- Technical Parsing: [XX/25]
- Competency Matching: [XX/35] 
- Experience Relevance: [XX/25]
- Growth Potential: [XX/15]

**βœ… TOP 5 STRENGTHS:**
1. [Specific strength with evidence]
2. [Specific strength with evidence]
3. [Specific strength with evidence]
4. [Specific strength with evidence]
5. [Specific strength with evidence]

**πŸ”§ TOP 3 IMPROVEMENT AREAS:**
1. [Specific area with actionable suggestion]
2. [Specific area with actionable suggestion]
3. [Specific area with actionable suggestion]

**πŸ”‘ CRITICAL MISSING KEYWORDS:**
- [High-impact keyword 1]
- [High-impact keyword 2]
- [High-impact keyword 3]

**πŸ’‘ STRATEGIC RECOMMENDATIONS:**
[3-4 specific, actionable recommendations]

**πŸ† FINAL VERDICT:**
[EXCEPTIONAL 90-100 | STRONG 75-89 | GOOD 60-74 | DEVELOPING 45-59 | NEEDS WORK <45]

**CONSISTENCY GUARANTEES:**
- Same resume + same job description = identical analysis (Β±2 points variation max)
- Standardized language and terminology across all evaluations
- Reproducible scoring methodology regardless of domain
- Time-consistent results (same analysis today and tomorrow)
"""

# Specialized prompts that extend the master prompt for specific use cases
SPECIALIZED_PROMPTS = {
    "evaluate_resume": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: COMPREHENSIVE RESUME EVALUATION**
Apply the Universal Evaluation Framework above to provide a complete assessment.
Focus on overall candidacy evaluation with balanced perspective on strengths and development areas.
Maintain professional tone suitable for HR professionals and hiring managers.
""",

    "improve_skills": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: SKILL ENHANCEMENT STRATEGY**
After completing the standard evaluation, provide additional guidance:
**πŸ“ˆ SKILL DEVELOPMENT ROADMAP:**
- **Immediate Actions (0-3 months):** Quick wins and foundational improvements
- **Short-term Goals (3-12 months):** Structured learning and certification paths  
- **Long-term Vision (1-3 years):** Strategic career advancement opportunities
**πŸŽ“ LEARNING RESOURCES:**
- Recommended courses, certifications, and training programs
- Industry conferences and networking opportunities
- Practical projects and portfolio development suggestions
Focus on actionable, measurable improvement strategies with clear timelines.
""",

    "missing_keywords": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: ATS KEYWORD OPTIMIZATION**
After completing the standard evaluation, provide enhanced keyword analysis:
**πŸ” ADVANCED KEYWORD ANALYSIS:**
- **CRITICAL MISSING (High Impact):** Essential terms significantly affecting ATS ranking
- **IMPORTANT ADDITIONS (Medium Impact):** Valuable terms improving visibility
- **OPTIMIZATION OPPORTUNITIES (Low Impact):** Supplementary terms for comprehensive coverage
**πŸ“ INTEGRATION STRATEGY:**
- Specific resume sections for keyword placement
- Natural integration techniques avoiding keyword stuffing
- Industry-appropriate phrasing and terminology
**πŸ€– ATS COMPATIBILITY SCORE:** [Detailed breakdown of parsing efficiency]
""",

    "percentage_match": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: PRECISE MATCHING ANALYSIS**
Provide the standard evaluation with enhanced quantitative focus:
**πŸ“Š DETAILED SCORING BREAKDOWN:**
Present exact point allocation for each category with clear justification.
Include competitive benchmarking and market positioning analysis.
Provide specific improvement strategies for 10-15% score increase.
**🎯 MATCH PERCENTAGE: [XX%]**
Tier Classification with detailed rationale and next steps.
""",

    "answer_query": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: EXPERT CONSULTATION**
Apply domain expertise to answer the specific query while considering:
- Resume content and job description context
- Industry best practices and current market trends
- Practical, actionable guidance
- Evidence-based recommendations
Provide thorough, well-researched responses with specific examples and multiple solution approaches when applicable.
""",

    "executive_assessment": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: EXECUTIVE-LEVEL EVALUATION**
Apply enhanced criteria for senior leadership positions:
**πŸ‘” EXECUTIVE COMPETENCY FRAMEWORK:**
- Strategic thinking and vision development
- Change management and transformation leadership
- Financial acumen and business impact
- Board readiness and governance experience
**πŸ“ˆ LEADERSHIP IMPACT ANALYSIS:**
- Quantifiable business results and achievements
- Market expansion and competitive positioning
- Organizational culture and talent development
- Crisis leadership and resilience
Provide insights suitable for C-suite and board-level discussions.
""",

    "career_transition": f"""
{UNIVERSAL_MASTER_PROMPT}
**SPECIFIC TASK: CAREER PIVOT ANALYSIS**
Evaluate career change feasibility with:
**πŸ”„ TRANSITION ASSESSMENT:**
- Transferable skills mapping across industries
- Market positioning strategy for career change
- Risk mitigation and success probability analysis
- Timeline and milestone planning
**🎯 TRANSITION ROADMAP:**
- Phase-wise transition strategy
- Skill development priorities
- Network building and industry immersion plan
Provide strategic guidance maximizing transition success while minimizing career risks.
"""
}

# Enhanced configuration for consistency
GENERATION_CONFIG = {
    "temperature": 0.15,  # Low temperature for maximum consistency
    "top_p": 0.8,
    "top_k": 40,
    "max_output_tokens": 4096,
    "stop_sequences": [],
}

# Model options optimized for consistency and performance
MODEL_FALLBACK_CHAIN = [
    "gemini-2.5-flash",                      # πŸ₯‡ PRIMARY - Latest Gemini 2.5
    "gemini-2.5-flash-lite-preview-06-17"   # πŸ₯ˆ FALLBACK - Lite version
]

# Rate limiting decorator
def rate_limit(min_interval=2):
    def decorator(func):
        last_called = [0]
        @wraps(func)
        def wrapper(*args, **kwargs):
            elapsed = time.time() - last_called[0]
            left_to_wait = min_interval - elapsed
            if left_to_wait > 0:
                time.sleep(left_to_wait)
            result = func(*args, **kwargs)
            last_called[0] = time.time()
            return result
        return wrapper
    return decorator

# Cache management functions
def init_cache():
    """Initialize SQLite cache for consistency"""
    conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS analysis_cache (
            hash_key TEXT PRIMARY KEY,
            response TEXT,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
            model_used TEXT
        )
    """)
    conn.commit()
    conn.close()

def get_cached_response(consistency_hash, model_id):
    """Get cached response if available"""
    try:
        conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
        cursor = conn.cursor()
        cursor.execute(
            "SELECT response FROM analysis_cache WHERE hash_key = ? AND model_used = ?",
            (consistency_hash, model_id)
        )
        result = cursor.fetchone()
        conn.close()
        return result[0] if result else None
    except:
        return None

def cache_response(consistency_hash, response, model_id):
    """Cache the response for future use"""
    try:
        conn = sqlite3.connect(CACHE_DIR / "ats_cache.db")
        cursor = conn.cursor()
        cursor.execute(
            "INSERT OR REPLACE INTO analysis_cache (hash_key, response, model_used) VALUES (?, ?, ?)",
            (consistency_hash, response, model_id)
        )
        conn.commit()
        conn.close()
    except Exception as e:
        st.warning(f"Cache save failed: {e}")

# Token estimation and content optimization
def estimate_tokens(text, model="gpt-3.5-turbo"):
    """Estimate token count for text"""
    if HAS_TIKTOKEN:
        try:
            encoding = tiktoken.encoding_for_model(model)
            return len(encoding.encode(text))
        except:
            pass
    # Fallback estimation: roughly 4 characters per token
    return len(text) // 4

def optimize_content_length(resume_text, job_description, max_resume_tokens=2000, max_job_tokens=1500):
    """Optimize content length to stay within token limits"""
    
    # Prioritize key sections in resume
    resume_sections = {
        'experience': [],
        'skills': [],
        'education': [],
        'summary': []
    }
    
    # Simple section detection
    lines = resume_text.split('\n')
    current_section = 'summary'
    
    for line in lines:
        line_lower = line.lower().strip()
        if any(keyword in line_lower for keyword in ['experience', 'work', 'employment']):
            current_section = 'experience'
        elif any(keyword in line_lower for keyword in ['skills', 'technical', 'competencies']):
            current_section = 'skills'
        elif any(keyword in line_lower for keyword in ['education', 'academic', 'degree']):
            current_section = 'education'
        
        if line.strip():
            resume_sections[current_section].append(line)
    
    # Build optimized resume content
    optimized_resume = []
    
    # Add summary (first 300 chars)
    if resume_sections['summary']:
        summary_text = '\n'.join(resume_sections['summary'][:5])
        optimized_resume.append(f"PROFESSIONAL SUMMARY:\n{summary_text[:300]}")
    
    # Add experience (prioritize recent)
    if resume_sections['experience']:
        exp_text = '\n'.join(resume_sections['experience'][:15])
        optimized_resume.append(f"WORK EXPERIENCE:\n{exp_text[:800]}")
    
    # Add skills
    if resume_sections['skills']:
        skills_text = '\n'.join(resume_sections['skills'][:8])
        optimized_resume.append(f"SKILLS:\n{skills_text[:400]}")
    
    # Add education
    if resume_sections['education']:
        edu_text = '\n'.join(resume_sections['education'][:5])
        optimized_resume.append(f"EDUCATION:\n{edu_text[:200]}")
    
    optimized_resume_text = '\n\n'.join(optimized_resume)
    
    # Ensure we're within token limits
    resume_tokens = estimate_tokens(optimized_resume_text)
    if resume_tokens > max_resume_tokens:
        # Truncate if still too long
        chars_per_token = len(optimized_resume_text) / resume_tokens
        max_chars = int(max_resume_tokens * chars_per_token)
        optimized_resume_text = optimized_resume_text[:max_chars] + "... [truncated]"
    
    # Optimize job description
    job_lines = job_description.split('\n')
    important_lines = []
    
    for line in job_lines:
        line_lower = line.lower()
        # Prioritize lines with key information
        if any(keyword in line_lower for keyword in [
            'require', 'must', 'essential', 'experience', 'skill', 
            'qualification', 'bachelor', 'master', 'year', 'certification'
        ]):
            important_lines.append(line)
        elif line.strip() and len(important_lines) < 20:
            important_lines.append(line)
    
    optimized_job = '\n'.join(important_lines)
    
    # Ensure job description is within limits
    job_tokens = estimate_tokens(optimized_job)
    if job_tokens > max_job_tokens:
        chars_per_token = len(optimized_job) / job_tokens
        max_chars = int(max_job_tokens * chars_per_token)
        optimized_job = optimized_job[:max_chars] + "... [truncated]"
    
    return optimized_resume_text, optimized_job

def create_consistency_hash(resume_text, job_description, prompt_type):
    """Create a hash for identical inputs to ensure consistent outputs"""
    content = f"{resume_text[:1000]}{job_description[:1000]}{prompt_type}"
    return hashlib.md5(content.encode()).hexdigest()

def get_available_model(force_primary=True):
    """Get the first available model from the fallback chain"""
    if force_primary:
        # Force use of primary model without testing
        return MODEL_FALLBACK_CHAIN[0]  # gemini-2.5-flash
    
    for model in MODEL_FALLBACK_CHAIN:
        try:
            st.info(f"πŸ” Testing model: {model}")
            
            test_model = genai.GenerativeModel(
                model,
                safety_settings={
                    genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
                    genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
                    genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
                    genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
                }
            )
            
            # Test with a simple prompt
            test_response = test_model.generate_content(
                "Say 'OK'",
                generation_config=genai.types.GenerationConfig(
                    temperature=0.1,
                    max_output_tokens=10
                )
            )
            
            if test_response.text:
                st.success(f"βœ… {model} - Test successful!")
                return model
            else:
                st.warning(f"⚠️ {model} - No response text")
                
        except Exception as e:
            st.error(f"❌ {model} - Failed: {str(e)}")
            continue
    
    raise Exception("No available Gemini models found")

@rate_limit(min_interval=2)
def get_consistent_gemini_response(model_id, prompt, pdf_content, input_text, consistency_hash):
    """Enhanced response generation with robust error handling"""
    try:
        model = genai.GenerativeModel(
            model_id,
            safety_settings={
                genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
                genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
                genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE,
                genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
            }
        )
        
        # Add consistency instruction to prompt
        enhanced_prompt = f"""
{prompt}

**CONSISTENCY PROTOCOL ACTIVE:**
Session ID: {consistency_hash}
Evaluation Date: {datetime.now().strftime('%Y-%m-%d')}
Apply identical methodology and scoring for consistent results.
Use deterministic analysis patterns and standardized language.

**RESUME CONTENT:**
{pdf_content[:3000]}

**JOB DESCRIPTION:**
{input_text[:2000]}
"""
        
        response = model.generate_content(
            enhanced_prompt,
            generation_config=genai.types.GenerationConfig(**GENERATION_CONFIG)
        )
        
        # Enhanced error checking
        if hasattr(response, 'candidates') and response.candidates:
            candidate = response.candidates[0]
            
            # Check finish reason
            if hasattr(candidate, 'finish_reason'):
                if candidate.finish_reason == 1:  # STOP - Normal completion
                    return response.text if hasattr(response, 'text') and response.text else "Analysis completed but no content returned."
                elif candidate.finish_reason == 2:  # MAX_TOKENS
                    return "⚠️ Analysis truncated due to length. Please try with a shorter resume or job description."
                elif candidate.finish_reason == 3:  # SAFETY
                    return "⚠️ Content filtered for safety. Please review your input for any potentially problematic content."
                elif candidate.finish_reason == 4:  # RECITATION
                    return "⚠️ Content blocked due to recitation concerns. Please try rephrasing your input."
                else:
                    return f"⚠️ Generation stopped with reason: {candidate.finish_reason}"
            
            # Try to get text anyway
            try:
                return response.text if response.text else "No analysis content generated."
            except:
                return "Analysis completed but content could not be retrieved."
        
        return "No response candidates generated. Please try again."
        
    except Exception as e:
        st.error(f"⚠️ Analysis Error: {str(e)}")
        
        # Fallback with simpler model configuration
        try:
            simple_model = genai.GenerativeModel("gemini-pro")
            simple_prompt = f"Analyze this resume against the job description:\n\nResume: {pdf_content[:1000]}\n\nJob: {input_text[:1000]}"
            
            fallback_response = simple_model.generate_content(simple_prompt)
            return f"⚠️ Using fallback analysis:\n\n{fallback_response.text}"
        except:
            return "Unable to complete analysis. Please check your API key, reduce content length, and try again."

def clean_extracted_text(text):
    """Clean and format extracted text"""
    # Remove excessive whitespace
    text = re.sub(r'\n\s*\n\s*\n', '\n\n', text)
    text = re.sub(r'[ \t]+', ' ', text)
    
    # Fix common extraction issues
    text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)  # Add space before capitals
    text = re.sub(r'(\w)([β€’Β·β–ͺβ–«])', r'\1 \2', text)  # Space before bullets
    text = re.sub(r'([β€’Β·β–ͺβ–«])(\w)', r'\1 \2', text)  # Space after bullets
    
    # Remove page markers
    text = re.sub(r'--- Page \d+ ---', '', text)
    
    # Normalize line endings
    text = text.replace('\r\n', '\n').replace('\r', '\n')
    
    # Remove empty lines at start and end
    text = text.strip()
    
    return text

def enhanced_pdf_processing(pdf_docs):
    """Enhanced PDF processing with better text extraction and formatting"""
    text = ""
    
    for doc in pdf_docs:
        try:
            if doc.name.endswith(".pdf"):
                # Save uploaded file temporarily
                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
                    tmp_file.write(doc.getvalue())
                    tmp_path = tmp_file.name
                
                try:
                    # Try multiple extraction methods
                    pdf_reader = PdfReader(tmp_path)
                    extracted_text = ""
                    
                    for page_num, page in enumerate(pdf_reader.pages):
                        page_text = page.extract_text()
                        
                        # Clean up common PDF extraction issues
                        page_text = re.sub(r'\s+', ' ', page_text)  # Normalize whitespace
                        page_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', page_text)  # Add spaces between words
                        
                        extracted_text += f"\n--- Page {page_num + 1} ---\n{page_text}\n"
                    
                    # If extraction is poor, try alternative method
                    if len(extracted_text.strip()) < 100 and HAS_PDFPLUMBER:
                        try:
                            with pdfplumber.open(tmp_path) as pdf:
                                for page in pdf.pages:
                                    page_text = page.extract_text()
                                    if page_text:
                                        extracted_text += page_text + "\n"
                        except Exception:
                            pass
                    
                    text += extracted_text
                    
                finally:
                    # Clean up temporary file
                    os.unlink(tmp_path)
                    
            elif doc.name.endswith(".docx") and HAS_DOCX:
                try:
                    # Save uploaded file temporarily
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
                        tmp_file.write(doc.getvalue())
                        tmp_path = tmp_file.name
                    
                    try:
                        doc_reader = docx.Document(tmp_path)
                        
                        # Extract paragraphs
                        for para in doc_reader.paragraphs:
                            if para.text.strip():
                                text += para.text + "\n"
                        
                        # Extract tables
                        for table in doc_reader.tables:
                            for row in table.rows:
                                row_text = " | ".join([cell.text.strip() for cell in row.cells])
                                if row_text.strip():
                                    text += row_text + "\n"
                    
                    finally:
                        os.unlink(tmp_path)
                        
                except Exception as e:
                    st.error(f"πŸ“‹ Error processing DOCX {doc.name}: {str(e)}")
                    
        except Exception as e:
            st.error(f"πŸ“„ Error processing {doc.name}: {str(e)}")
            continue
    
    # Clean and format the extracted text
    text = clean_extracted_text(text)
    return text

def validate_resume_content(text):
    """Validate that the extracted text looks like a resume"""
    text_lower = text.lower()
    
    # Check for common resume indicators
    resume_indicators = [
        'experience', 'education', 'skills', 'work', 'employment',
        'university', 'college', 'degree', 'certification', 'project',
        'email', 'phone', 'address', 'linkedin'
    ]
    
    found_indicators = sum(1 for indicator in resume_indicators if indicator in text_lower)
    
    if found_indicators < 3:
        st.warning("⚠️ The uploaded file may not be a resume. Please verify the content.")
        return False
    
    if len(text.strip()) < 200:
        st.warning("⚠️ The extracted text seems too short. Please check your file.")
        return False
    
    return True

def validate_configuration():
    """Validate system configuration"""
    issues = []
    
    # Check API key
    if not os.getenv("GOOGLE_API_KEY") and not st.session_state.get("api_key"):
        issues.append("❌ Google API Key not configured")
    
    # Check optional packages
    if not HAS_DOCX:
        issues.append("⚠️ Optional: Install python-docx for better DOCX support (pip install python-docx)")
    
    if not HAS_PDFPLUMBER:
        issues.append("⚠️ Optional: Install pdfplumber for better PDF extraction (pip install pdfplumber)")
    
    if not HAS_TIKTOKEN:
        issues.append("⚠️ Optional: Install tiktoken for better token estimation (pip install tiktoken)")
    
    return issues

@st.cache_data
def perform_enhanced_analysis(resume_text, job_description, analysis_type, custom_query=None, force_primary=True):
    """Main analysis function with all improvements"""
    
    # Initialize cache
    init_cache()
    
    # Optimize content length
    optimized_resume, optimized_job = optimize_content_length(resume_text, job_description)
    
    # Create consistency hash
    consistency_hash = create_consistency_hash(optimized_resume, optimized_job, analysis_type)
    
    # Try to get from cache first
    model_id = get_available_model(force_primary=force_primary)
    cached_response = get_cached_response(consistency_hash, model_id)
    
    if cached_response:
        st.success("⚑ Retrieved from cache for consistency")
        return cached_response, consistency_hash
    
    # Select prompt
    prompt_map = {
        "evaluate": "evaluate_resume",
        "improve": "improve_skills", 
        "keywords": "missing_keywords",
        "match": "percentage_match",
        "executive": "executive_assessment",
        "transition": "career_transition",
        "custom": "answer_query"
    }
    
    base_prompt = SPECIALIZED_PROMPTS[prompt_map.get(analysis_type, "evaluate_resume")]
    
    if analysis_type == "custom" and custom_query:
        base_prompt = f"{base_prompt}\n\nSPECIFIC QUERY: {custom_query}"
    
    # Generate response
    response = get_consistent_gemini_response(
        model_id, base_prompt, optimized_resume, optimized_job, consistency_hash
    )
    
    # Cache the response
    if response and not response.startswith("⚠️"):
        cache_response(consistency_hash, response, model_id)
    
    return response, consistency_hash

# Streamlit App Configuration
st.set_page_config(
    page_title="Smart ATS System", 
    page_icon="πŸš€",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for professional UI
st.markdown("""
<style>
    .main-header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 2rem;
        border-radius: 15px;
        text-align: center;
        color: white;
        margin-bottom: 2rem;
        box-shadow: 0 10px 30px rgba(0,0,0,0.1);
    }
    .feature-card {
        background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
        padding: 1.5rem;
        border-radius: 12px;
        border-left: 5px solid #667eea;
        margin: 1rem 0;
        box-shadow: 0 5px 15px rgba(0,0,0,0.08);
    }
    .metric-container {
        background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
        color: white;
        padding: 1.5rem;
        border-radius: 12px;
        text-align: center;
        margin: 1rem 0;
    }
    .consistency-badge {
        background: #ffc107;
        color: #212529;
        padding: 0.5rem 1rem;
        border-radius: 20px;
        font-weight: bold;
        display: inline-block;
        margin: 0.5rem 0;
    }
    .stButton > button {
        height: 3rem;
        font-weight: 600;
    }
</style>
""", unsafe_allow_html=True)

# Main Header
st.markdown("""
<div class="main-header">
    <h1>πŸš€ Smart ATS System</h1>
    <p>AI-Powered Resume Analysis with Guaranteed Consistency | Global Multi-Domain Support</p>
    <div class="consistency-badge">βœ… Consistent Results Guaranteed</div>
</div>
""", unsafe_allow_html=True)



# Sidebar Configuration
with st.sidebar:
    st.markdown("### πŸ”‘ Configuration")
    st.markdown("[Get your Google API Key](https://aistudio.google.com/app/apikey)")
    
    api_key = st.text_input("πŸ” Google API Key", type="password", help="Your Gemini API key for AI analysis")
    st.session_state["api_key"] = api_key
    
    # Model selection option
    force_primary = st.checkbox("🎯 Force Primary Model", value=True, help="Use gemini-2.5-flash directly without testing")
    st.session_state["force_primary"] = force_primary
    
    if api_key:
        try:
            genai.configure(api_key=api_key)
            model_id = get_available_model(force_primary=force_primary)
            st.success(f"βœ… Connected to {model_id}")
        except Exception as e:
            st.error(f"❌ API Key Error: {str(e)}")
    
    st.markdown("### πŸ“‚ Document Upload")
    uploaded_files = st.file_uploader(
        "πŸ“„ Upload Resume (PDF/DOCX)", 
        type=["pdf", "docx"], 
        accept_multiple_files=True,
        help="Upload your resume in PDF or DOCX format"
    )
    
    if uploaded_files:
        st.success(f"βœ… {len(uploaded_files)} file(s) uploaded successfully!")
        

# Main Interface
st.markdown("### πŸ“ Job Description Input")
input_text = st.text_area(
    "Paste the complete job description here:",
    height=150,
    placeholder="Enter the full job description including requirements, responsibilities, and qualifications...",
    help="The more detailed the job description, the more accurate the analysis"
)

# Analysis Buttons
st.markdown("### 🎯 Choose Analysis Type")
col1, col2, col3 = st.columns(3)

with col1:
    evaluate_btn = st.button("πŸ“Š Complete Evaluation", use_container_width=True)
    improve_btn = st.button("πŸ“ˆ Skill Enhancement", use_container_width=True)

with col2:
    keywords_btn = st.button("πŸ” Keyword Analysis", use_container_width=True)
    match_btn = st.button("🎯 Match Percentage", use_container_width=True)

with col3:
    executive_btn = st.button("πŸ‘” Executive Assessment", use_container_width=True)
    transition_btn = st.button("πŸ”„ Career Transition", use_container_width=True)

# Custom Query Section
with st.expander("πŸ’¬ Ask Custom Question"):
    custom_query = st.text_input("Ask any specific question about the resume or career advice:")
    query_btn = st.button("πŸ€” Get Expert Answer")

# Analysis Logic
analysis_triggered = any([evaluate_btn, improve_btn, keywords_btn, match_btn, executive_btn, transition_btn, query_btn])

if analysis_triggered:
    # Validation
    errors = []
    if not api_key:
        errors.append("πŸ” Please enter your Google API Key")
    if not uploaded_files:
        errors.append("πŸ“„ Please upload your resume")
    if not input_text:
        errors.append("πŸ“ Please paste the job description")
    
    if errors:
        for error in errors:
            st.error(error)
    else:
        # Process analysis
        with st.spinner("πŸ”„ Analyzing with advanced AI algorithms..."):
            pdf_content = enhanced_pdf_processing(uploaded_files)
            
            # Validate content
            if not validate_resume_content(pdf_content):
                st.warning("⚠️ Please verify that your uploaded file is a valid resume.")
            
            # Determine analysis type
            if evaluate_btn:
                analysis_type = "evaluate"
            elif improve_btn:
                analysis_type = "improve"
            elif keywords_btn:
                analysis_type = "keywords"
            elif match_btn:
                analysis_type = "match"
            elif executive_btn:
                analysis_type = "executive"
            elif transition_btn:
                analysis_type = "transition"
            elif query_btn:
                analysis_type = "custom"
            
            try:
                # Get force_primary setting
                force_primary_setting = st.session_state.get("force_primary", True)
                
                # Perform analysis
                response, consistency_hash = perform_enhanced_analysis(
                    pdf_content, input_text, analysis_type, custom_query, force_primary_setting
                )
                
                # Display results
                st.markdown("## πŸ“‹ Analysis Results")
                
                # Show metadata
                col1, col2 = st.columns(2)
                with col1:
                    st.markdown(f"**Consistency ID:** `{consistency_hash[:8]}`")
                with col2:
                    st.markdown(f"**Analysis Type:** {analysis_type.title()}")
                
                st.markdown("---")
                st.markdown(response)
                
            
                
                # Show content optimization info
                if st.checkbox("πŸ” Show Content Optimization Details"):
                    optimized_resume, optimized_job = optimize_content_length(pdf_content, input_text)
                    
                    col1, col2 = st.columns(2)
                    with col1:
                        st.metric("Resume Tokens", estimate_tokens(optimized_resume))
                        st.metric("Original Resume Length", len(pdf_content))
                    with col2:
                        st.metric("Job Description Tokens", estimate_tokens(optimized_job))
                        st.metric("Original Job Length", len(input_text))
                
            except Exception as e:
                st.error(f"Analysis failed: {str(e)}")
                st.info("Please try again with a shorter document or check your API key.")

# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #666;">
    <p>πŸš€ Smart ATS System  | Powered by Advanced AI | Consistent β€’ Reliable β€’ Universal</p>
    <p>Built with cutting-edge strategies for maximum ATS compatibility and career success</p>
</div>
""", unsafe_allow_html=True)

# Initialize cache on startup
if __name__ == "__main__":
    init_cache()