# app/main.py import os import json import logging from typing import Optional import asyncio import uvicorn from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel import torch from .model import PlutusModel, SummaryModel from .recommender import Recommender logging.basicConfig(level=logging.INFO) logger = logging.getLogger("plutus.api") _CACHE_DIR = os.getenv("HF_HOME", "/home/user/app") DEFAULT_RECOMMEND_JSON = os.getenv(_CACHE_DIR, "recommend.json") RECOMMEND_INDEX_PATH = os.path.join(_CACHE_DIR, "plutus_recommend_index.faiss") RECOMMEND_META_PATH = os.path.join(_CACHE_DIR, "plutus_recommend_meta.json") class GenerateCache: last_query: Optional[str] = None last_topic: Optional[str] = None last_personality: Optional[str] = None last_level: Optional[str] = None last_output: Optional[str] = None GEN_CACHE = GenerateCache() logger.info("Loading PlutusModel + Recommender...") plutus_model = PlutusModel() recommender = Recommender( recommend_json_path=DEFAULT_RECOMMEND_JSON, index_path=RECOMMEND_INDEX_PATH, meta_path=RECOMMEND_META_PATH ) summary_model_wrapper = SummaryModel(model_name="Remostart/Plutus_Tutor_model") app = FastAPI(title="Plutus Learner API") class GenerateRequest(BaseModel): personality: str level: str topic: str query: str max_new_tokens: int = 700 temperature: float = 0.4 top_p: float = 0.5 class RecommendRequest(BaseModel): top_k: int = 5 class SummaryRequest(BaseModel): top_k: int = 5 @app.get("/health") async def health(): return {"status": "ok", "device": plutus_model.device} @app.post("/generate") async def generate(req: GenerateRequest): prompt = plutus_model.create_prompt( req.personality, req.level, req.topic, req.query ) output_text = plutus_model.generate( prompt, max_new_tokens=req.max_new_tokens, temperature=req.temperature, top_p=req.top_p ) # Cache everything for summary & follow-up queries GEN_CACHE.last_query = req.query GEN_CACHE.last_topic = req.topic GEN_CACHE.last_personality = req.personality GEN_CACHE.last_level = req.level GEN_CACHE.last_output = output_text return { "text": output_text, "topic": req.topic, "query": req.query } @app.get("/stream_generate") async def stream_generate(personality: str, level: str, topic: str): prompt = plutus_model.create_prompt(personality, level, topic, "") async def generate_events(): for chunk in plutus_model.stream_generate(prompt): yield f"data: {json.dumps({'chunk': chunk})}\n\n" await asyncio.sleep(0.01) return StreamingResponse(generate_events(), media_type="text/event-stream") @app.post("/recommend") async def recommend(req: RecommendRequest): if GEN_CACHE.last_query is None: raise HTTPException(400, "No query found. Call /generate first.") results = recommender.recommend_for_query( query=GEN_CACHE.last_query, top_k=req.top_k, topic_boost=GEN_CACHE.last_topic ) cleaned = [ {"topic": r["topic"], "type": r["type"], "url": r["url"]} for r in results ] return {"query": GEN_CACHE.last_query, "results": cleaned} @app.post("/summary") async def summary(req: SummaryRequest): if GEN_CACHE.last_output is None: raise HTTPException(400, "No generate output found. Call /generate first.") # Get recommended resources recs = recommender.recommend_for_query( query=GEN_CACHE.last_query, top_k=req.top_k, topic_boost=GEN_CACHE.last_topic ) pretty_recs = [] for r in recs: pretty_recs.append(f"- ({r['type']}) {r['url']}") formatted_resources_for_llm = "\n".join(pretty_recs) readable_resource_block = f""" Here are some helpful resources for further learning: {formatted_resources_for_llm} """ full_input_text = f""" Summarize the explanation below in a clear, simple, structured way. Your summary must include: 1. A clean explanation of the topic 2. A study roadmap 3. A friendly explanation of the recommended learning resources (videos, docs) ---- MAIN CONTENT ---- {GEN_CACHE.last_output} ---- RECOMMENDED RESOURCES ---- {readable_resource_block} """ # Include recommended=recs summary_text = summary_model_wrapper.summarize_text( full_input_text, topic=GEN_CACHE.last_topic, level=GEN_CACHE.last_level, recommended=recs, max_new_tokens=300 ) cleaned_resources = [ {"type": r["type"], "url": r["url"]} for r in recs ] return { "topic": GEN_CACHE.last_topic, "summary": summary_text, "resources": cleaned_resources } @app.post("/admin/build_index") async def build_index(force: bool = False): recommender.build_index(force=force) return {"indexed": len(recommender.meta)} if __name__ == "__main__": uvicorn.run("app.main:app", host="0.0.0.0", port=7860)