research-companion-api / format_selector.py
JinHeon0505's picture
Upload 20 files
b20db86 verified
# ================================================================
# 파이브라인 v2 β€” Format Selector v2.6
# ================================================================
# v2.1 - [Fix-10] stage λ§€μΉ­ μ •κ·œμ‹
# v2.2 - [Redesign] 8 TYPE + exploratory + checklist
# v2.6 - [F6] select_format λ°˜ν™˜μ„ (format, reason) νŠœν”Œλ‘œ ν™•μž₯
# β€” 포맷 선택 κ·Όκ±°λ₯Ό UI에 λ…ΈμΆœν•˜κΈ° μœ„ν•¨.
# β€” ν•˜μœ„ ν˜Έν™˜: select_format_simple(...) 은 κΈ°μ‘΄ str λ°˜ν™˜.
# ================================================================
import re
# ── stage λ§€μΉ­ μ •κ·œμ‹ ─────────────────────────────────────────
SENIOR_PATTERN = re.compile(
r"박사|포슀λ‹₯|포슀트λ‹₯|μž„μƒ|ꡐ수|연ꡬ원|μ—°κ΅¬μž|"
r"phd|postdoc|post[- ]doc|researcher|professor|faculty|"
r"principal\s*investigator|\bpi\b",
re.IGNORECASE,
)
JUNIOR_PATTERN = re.compile(
r"석사|ν•™λΆ€|학사|μ‘Έμ—…μ˜ˆμ •|"
r"master|undergraduate|bachelor|\bms\b|\bbs\b|"
r"grad(uate)?\s*student",
re.IGNORECASE,
)
STANDARD_CONFLICTS = frozenset([
"method_contradiction",
"causation_from_correlation",
])
STAT_WARNING_KEYWORDS = [
"OLS", "νšŒκ·€", "상관", "p-value", "내생성", "역인과",
"톡계", "κ²€μ •", "λΆ„μ‚°", "ν‘œλ³Έ", "Cox", "ANOVA",
"regression", "correlation", "endogeneity",
]
def _is_senior(stage: str) -> bool:
return bool(SENIOR_PATTERN.search(stage))
def _is_junior(stage: str) -> bool:
return bool(JUNIOR_PATTERN.search(stage))
def _has_stat_warning(logic_warnings: list) -> bool:
text = " ".join(logic_warnings)
return any(kw in text for kw in STAT_WARNING_KEYWORDS)
# ================================================================
# 메인: (format, reason) νŠœν”Œ λ°˜ν™˜
# ================================================================
def select_format(
conflict_type: str,
stage: str,
verdict: str,
logic_warnings: list,
venue: str = "",
specific_concern: str = "",
rag_found: bool = False,
evidence_gap: str = "",
) -> tuple[str, str]:
"""
μž…λ ₯ νŒŒλΌλ―Έν„° β†’ (좜λ ₯ TYPE, 선택 κ·Όκ±° ν•œ 쀄).
λ°˜ν™˜ 예:
("journal_fit_review", "μ‹œλ‹ˆμ–΄ + λͺ©ν‘œ ν•™μˆ μ§€ μ§€μ •")
("growth_feedback", "PASS νŒμ •")
ν•˜μœ„ ν˜Έν™˜μ΄ ν•„μš”ν•˜λ©΄ select_format_simple(...) μ‚¬μš©.
"""
if verdict == "PASS":
return "growth_feedback", "PASS νŒμ •"
warning_count = len(logic_warnings)
if specific_concern and specific_concern.strip():
return "concern_advisory", "ꡬ체적 μš°λ €μ‚¬ν•­ 제기됨"
if rag_found and conflict_type and warning_count >= 2:
return "visual_report", f"RAG 근거 + {conflict_type} + 경고 {warning_count}개"
if (conflict_type == "causation_from_correlation"
and _has_stat_warning(logic_warnings)):
return "statistical_review", "인과·상관 ν˜Όλ™ + 톡계 κ²½κ³ "
if venue and _is_senior(stage):
return "journal_fit_review", f"μ‹œλ‹ˆμ–΄ + λͺ©ν‘œ ν•™μˆ μ§€({venue})"
if rag_found and len(evidence_gap) > 10:
return "research_discovery", "RAG κ·Όκ±° + λͺ…ν™•ν•œ κ·Όκ±° 곡백"
if _is_junior(stage) and (
conflict_type in STANDARD_CONFLICTS or _has_stat_warning(logic_warnings)
):
return "methodology_teaching", "μ΄ˆκΈ‰ μ—°κ΅¬μž + 방법둠/톡계 κ²½κ³ "
if conflict_type == "unsupported_generalization" and _is_senior(stage):
return "exploratory", "μ‹œλ‹ˆμ–΄ + κ³ΌλŒ€μΌλ°˜ν™” μ˜μ‹¬"
if _is_junior(stage) or warning_count >= 2:
reason = "μ΄ˆκΈ‰ μ—°κ΅¬μž" if _is_junior(stage) else f"κ²½κ³  {warning_count}개"
return "checklist", reason
return "hypothesis_critique", "κΈ°λ³Έ 경둜 (특수 쑰건 μ—†μŒ)"
# ================================================================
# ν•˜μœ„ ν˜Έν™˜μš©
# ================================================================
def select_format_simple(**kwargs) -> str:
"""이전 λ²„μ „μ²˜λŸΌ str만 λ°˜ν™˜."""
fmt, _ = select_format(**kwargs)
return fmt
def needs_visual(
selected_format: str,
logic_warnings: list,
) -> bool:
"""HTML μ‹œκ°ν™”(TYPE_G) 병행 ν•„μš” μ—¬λΆ€"""
if selected_format in ("journal_fit_review", "growth_feedback"):
return True
if selected_format == "research_discovery" and len(logic_warnings) >= 2:
return True
return False