M3 / app.py
ZENLLC's picture
Update app.py
e946c40 verified
import os
from typing import List, Dict, Tuple, Union
import gradio as gr
from openai import OpenAI
# Optional parsers
import pandas as pd
from pypdf import PdfReader
from docx import Document as DocxDocument
# ===============================
# Client
# ===============================
def get_client(key: str) -> OpenAI:
key = (key or "").strip()
if not key:
raise gr.Error("Please enter your OpenAI API key.")
return OpenAI(api_key=key)
# ===============================
# Chat (messages format + streaming)
# ===============================
def stream_chat(api_key: str, user_input: str, history: List[Dict]):
client = get_client(api_key)
history = history or []
user_input = (user_input or "").strip()
if not user_input:
return history, history, gr.update(value="")
msgs = history + [{"role": "user", "content": user_input}]
try:
stream = client.chat.completions.create(
model="gpt-5",
messages=msgs,
stream=True,
)
acc = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
acc += delta
yield msgs + [{"role": "assistant", "content": acc}], msgs, gr.update(value="")
final_hist = msgs + [{"role": "assistant", "content": acc}]
yield final_hist, final_hist, gr.update(value="")
except Exception as e:
err = f"[Error] {e}"
final_hist = msgs + [{"role": "assistant", "content": err}]
yield final_hist, final_hist, gr.update(value="")
# ===============================
# Pro Brief – File ingestion
# ===============================
TEXT_EXTS = {".txt", ".md", ".markdown"}
DOCX_EXTS = {".docx"}
PDF_EXTS = {".pdf"}
CSV_EXTS = {".csv"}
def _ext(path: str) -> str:
return os.path.splitext(path.lower())[1]
def _coerce_paths(files: List[Union[str, dict, gr.File]]) -> List[str]:
"""
Gradio may send:
- list[str] of absolute filepaths (when type='filepath')
- list[dict] with {'name': '/tmp/..'} in some versions
- list[gr.File] objects with .name
Normalize to list[str] filepaths.
"""
paths = []
for f in files or []:
if isinstance(f, str):
paths.append(f)
elif isinstance(f, dict) and "name" in f:
paths.append(f["name"])
else:
# gr.File or other object with .name
name = getattr(f, "name", None)
if name:
paths.append(name)
return paths
def read_text_file(fp: str) -> str:
try:
with open(fp, "r", encoding="utf-8") as f:
return f.read()
except UnicodeDecodeError:
with open(fp, "r", encoding="latin-1") as f:
return f.read()
def read_pdf(fp: str) -> str:
text = []
with open(fp, "rb") as f:
reader = PdfReader(f)
for page in reader.pages:
txt = page.extract_text() or ""
text.append(txt)
return "\n".join(text).strip()
def read_docx(fp: str) -> str:
doc = DocxDocument(fp)
return "\n".join([p.text for p in doc.paragraphs]).strip()
def summarize_csv(fp: str) -> str:
# Robust CSV read with separator fallbacks
read_attempts = [
dict(),
dict(sep=";"),
dict(sep="\t"),
]
last_err = None
df = None
for kwargs in read_attempts:
try:
df = pd.read_csv(fp, **kwargs)
break
except Exception as e:
last_err = e
if df is None:
raise gr.Error(f"Could not read CSV: {last_err}")
shape_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}"
cols = ", ".join([f"{c} ({str(df[c].dtype)})" for c in df.columns])
try:
desc = df.describe(include="all").transpose().fillna("").to_string()
except Exception:
desc = "(describe() failed for this CSV)"
try:
head = df.head(10).to_string(index=False)
except Exception:
head = "(preview failed)"
return (
"CSV SUMMARY\n"
f"{shape_info}\n\n"
f"COLUMNS & TYPES:\n{cols}\n\n"
f"DESCRIBE():\n{desc}\n\n"
f"FIRST 10 ROWS:\n{head}\n"
)
def load_files(files: List[Union[str, dict, gr.File]], progress: gr.Progress) -> Tuple[str, List[str]]:
paths = _coerce_paths(files)
if not paths:
raise gr.Error("Please upload at least one file (PDF, DOCX, TXT, MD, or CSV).")
texts = []
names = []
for i, path in enumerate(paths, start=1):
names.append(os.path.basename(path))
ext = _ext(path)
progress((i-0.5)/max(len(paths), 1), desc=f"Parsing {os.path.basename(path)}")
if ext in TEXT_EXTS:
texts.append(read_text_file(path))
elif ext in PDF_EXTS:
texts.append(read_pdf(path))
elif ext in DOCX_EXTS:
texts.append(read_docx(path))
elif ext in CSV_EXTS:
texts.append(summarize_csv(path))
else:
raise gr.Error(f"Unsupported file type: {ext}")
progress(i/max(len(paths), 1), desc=f"Parsed {os.path.basename(path)}")
return "\n\n-----\n\n".join(texts), names
# ===============================
# Pro Brief – Chunking & synthesis
# ===============================
def chunk_text(s: str, max_chars: int = 12000) -> List[str]:
s = (s or "").strip()
if not s:
return []
if len(s) <= max_chars:
return [s]
chunks = []
start = 0
while start < len(s):
end = min(start + max_chars, len(s))
cut = s.rfind("\n\n", start, end)
if cut == -1 or cut <= start + 2000:
cut = end
chunks.append(s[start:cut])
start = cut
return chunks
def llm_summarize_chunks(client: OpenAI, chunks: List[str], mode: str, custom_note: str, progress: gr.Progress) -> List[str]:
summaries = []
total = len(chunks)
if total == 0:
return summaries
mode_prompt = {
"Executive Brief": (
"Create a crisp executive brief with sections: Context, Key Findings, Metrics, Implications, Decisions Needed."
),
"Action Items": (
"Extract actionable tasks with owners (if available), deadlines (if implied), dependencies, and priority."
),
"Risks & Mitigations": (
"Identify key risks, likelihood, impact, and concrete mitigations. Include watchpoints and triggers."
),
"Meeting Minutes": (
"Produce clean, structured minutes: Attendees (if inferable), Agenda, Discussion, Decisions, Action Items."
),
"JSON Summary": (
"Return a compact JSON with keys: context, findings[], metrics{}, actions[], risks[], decisions[]."
),
}[mode]
for i, ch in enumerate(chunks, start=1):
progress(0.2 + 0.6*(i-1)/max(total,1), desc=f"Summarizing chunk {i}/{total}")
sys = "You are a senior analyst. Write succinctly; use bullet points where appropriate."
usr = f"{mode_prompt}\n\n{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n---\nSOURCE CHUNK {i}/{total}:\n{ch}\n"
resp = client.chat.completions.create(
model="gpt-5",
messages=[{"role": "system", "content": sys},
{"role": "user", "content": usr}],
)
summaries.append(resp.choices[0].message.content.strip())
progress(0.2 + 0.6*(i)/max(total,1), desc=f"Summarized chunk {i}/{total}")
return summaries
def llm_synthesize_final(client: OpenAI, mode: str, names: List[str], partials: List[str], custom_note: str, progress: gr.Progress) -> str:
progress(0.85, desc="Synthesizing final deliverable")
sys = "You are a chief of staff producing board-ready output. Tight, accurate, and well-structured."
corpus = "\n\n---\n\n".join([f"[PART {i+1}]\n{p}" for i, p in enumerate(partials)])
usr = (
f"Files analyzed: {', '.join(names)}\n\n"
f"Mode: {mode}\n"
f"{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n"
"Synthesize the PARTS into a single cohesive deliverable. If JSON mode, return only JSON."
"\n\n---\nCORPUS (SUMMARIES):\n" + corpus
)
resp = client.chat.completions.create(
model="gpt-5",
messages=[{"role": "system", "content": sys},
{"role": "user", "content": usr}],
)
progress(0.98, desc="Finalizing")
return resp.choices[0].message.content.strip()
def pro_brief(api_key: str, files: List[Union[str, dict, gr.File]], mode: str, custom_note: str):
progress = gr.Progress(track_tqdm=False)
client = get_client(api_key)
# Stage 1: Load files
progress(0.02, desc="Loading files")
out = "πŸ”Ž **Loading files...**\n"
yield out
raw_text, names = load_files(files, progress)
out += f"βœ… Parsed {len(names)} file(s): {', '.join(names)}\n"
yield out
# Stage 2: Chunk
progress(0.18, desc="Chunking text")
chunks = chunk_text(raw_text, max_chars=12000)
out += f"🧱 Created {len(chunks)} chunk(s) for analysis\n"
yield out
# Stage 3: Summarize chunks
partials = llm_summarize_chunks(client, chunks, mode, custom_note, progress)
out += f"🧠 Summarized {len(partials)} chunk(s)\n"
yield out
# Stage 4: Synthesize final
final = llm_synthesize_final(client, mode, names, partials, custom_note, progress)
# Done
progress(1.0, desc="Done")
if mode == "JSON Summary":
yield "```json\n" + final + "\n```"
else:
yield final
# ===============================
# UI
# ===============================
with gr.Blocks(title="ZEN GPT-5 β€’ Production Tools") as demo:
gr.Markdown("### πŸ” Enter your OpenAI API key (not stored)")
api_key = gr.Textbox(placeholder="sk-...", type="password", label="OpenAI API Key")
with gr.Tab("πŸ’¬ Chat"):
chatbox = gr.Chatbot(label="GPT-5 Chat", height=420, type="messages")
history_state = gr.State([])
user_in = gr.Textbox(placeholder="Say hi…", label="Message")
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat")
send_btn.click(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True)
user_in.submit(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True)
clear_btn.click(lambda: ([], []), None, [chatbox, history_state])
with gr.Tab("πŸ“„ Pro Brief (Docs β†’ Executive Output)"):
gr.Markdown(
"Upload PDFs, DOCX, TXT, MD, or CSV. Get an **Executive Brief**, **Action Items**, "
"**Risks & Mitigations**, **Meeting Minutes**, or a **JSON Summary**."
)
files = gr.File(label="Upload files", file_count="multiple", type="filepath")
mode = gr.Radio(
["Executive Brief", "Action Items", "Risks & Mitigations", "Meeting Minutes", "JSON Summary"],
value="Executive Brief",
label="Output Mode",
)
custom = gr.Textbox(label="Optional guidance (tone, audience, focus areas)", lines=3,
placeholder="e.g., Board-ready; focus on budget impact and timeline risk.")
run = gr.Button("Generate Pro Brief", variant="primary")
out = gr.Markdown(label="Output", show_copy_button=True)
# Connect generator: yields interim status + final report
run.click(pro_brief, [api_key, files, mode, custom], out, queue=True)
# Subtle program stamp
gr.HTML(
"<div style='text-align:right; font-size:12px; opacity:0.55; margin-top:10px;'>"
"Module 3 – ZEN SDK Production"
"</div>"
)
# Enable queuing (progress & concurrency-friendly)
demo.queue(max_size=64).launch()