Spaces:

JinHeon0505
/

research-companion-api

Paused

File size: 4,539 Bytes

# ================================================================
# 파이브라인 v2 — Format Selector v2.6
# ================================================================
# v2.1 - [Fix-10] stage 매칭 정규식
# v2.2 - [Redesign] 8 TYPE + exploratory + checklist
# v2.6 - [F6] select_format 반환을 (format, reason) 튜플로 확장
#             — 포맷 선택 근거를 UI에 노출하기 위함.
#             — 하위 호환: select_format_simple(...) 은 기존 str 반환.
# ================================================================

import re

# ── stage 매칭 정규식 ─────────────────────────────────────────
SENIOR_PATTERN = re.compile(
    r"박사|포스닥|포스트닥|임상|교수|연구원|연구자|"
    r"phd|postdoc|post[- ]doc|researcher|professor|faculty|"
    r"principal\s*investigator|\bpi\b",
    re.IGNORECASE,
)
JUNIOR_PATTERN = re.compile(
    r"석사|학부|학사|졸업예정|"
    r"master|undergraduate|bachelor|\bms\b|\bbs\b|"
    r"grad(uate)?\s*student",
    re.IGNORECASE,
)

STANDARD_CONFLICTS = frozenset([
    "method_contradiction",
    "causation_from_correlation",
])

STAT_WARNING_KEYWORDS = [
    "OLS", "회귀", "상관", "p-value", "내생성", "역인과",
    "통계", "검정", "분산", "표본", "Cox", "ANOVA",
    "regression", "correlation", "endogeneity",
]


def _is_senior(stage: str) -> bool:
    return bool(SENIOR_PATTERN.search(stage))


def _is_junior(stage: str) -> bool:
    return bool(JUNIOR_PATTERN.search(stage))


def _has_stat_warning(logic_warnings: list) -> bool:
    text = " ".join(logic_warnings)
    return any(kw in text for kw in STAT_WARNING_KEYWORDS)


# ================================================================
# 메인: (format, reason) 튜플 반환
# ================================================================
def select_format(
    conflict_type:    str,
    stage:            str,
    verdict:          str,
    logic_warnings:   list,
    venue:            str  = "",
    specific_concern: str  = "",
    rag_found:        bool = False,
    evidence_gap:     str  = "",
) -> tuple[str, str]:
    """
    입력 파라미터 → (출력 TYPE, 선택 근거 한 줄).

    반환 예:
      ("journal_fit_review", "시니어 + 목표 학술지 지정")
      ("growth_feedback",    "PASS 판정")

    하위 호환이 필요하면 select_format_simple(...) 사용.
    """
    if verdict == "PASS":
        return "growth_feedback", "PASS 판정"

    warning_count = len(logic_warnings)

    if specific_concern and specific_concern.strip():
        return "concern_advisory", "구체적 우려사항 제기됨"

    if rag_found and conflict_type and warning_count >= 2:
        return "visual_report", f"RAG 근거 + {conflict_type} + 경고 {warning_count}개"

    if (conflict_type == "causation_from_correlation"
            and _has_stat_warning(logic_warnings)):
        return "statistical_review", "인과·상관 혼동 + 통계 경고"

    if venue and _is_senior(stage):
        return "journal_fit_review", f"시니어 + 목표 학술지({venue})"

    if rag_found and len(evidence_gap) > 10:
        return "research_discovery", "RAG 근거 + 명확한 근거 공백"

    if _is_junior(stage) and (
        conflict_type in STANDARD_CONFLICTS or _has_stat_warning(logic_warnings)
    ):
        return "methodology_teaching", "초급 연구자 + 방법론/통계 경고"

    if conflict_type == "unsupported_generalization" and _is_senior(stage):
        return "exploratory", "시니어 + 과대일반화 의심"

    if _is_junior(stage) or warning_count >= 2:
        reason = "초급 연구자" if _is_junior(stage) else f"경고 {warning_count}개"
        return "checklist", reason

    return "hypothesis_critique", "기본 경로 (특수 조건 없음)"


# ================================================================
# 하위 호환용
# ================================================================
def select_format_simple(**kwargs) -> str:
    """이전 버전처럼 str만 반환."""
    fmt, _ = select_format(**kwargs)
    return fmt


def needs_visual(
    selected_format: str,
    logic_warnings:  list,
) -> bool:
    """HTML 시각화(TYPE_G) 병행 필요 여부"""
    if selected_format in ("journal_fit_review", "growth_feedback"):
        return True
    if selected_format == "research_discovery" and len(logic_warnings) >= 2:
        return True
    return False