|
|
import os |
|
|
from typing import List, Dict, Tuple, Union |
|
|
|
|
|
import gradio as gr |
|
|
from openai import OpenAI |
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
from pypdf import PdfReader |
|
|
from docx import Document as DocxDocument |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_client(key: str) -> OpenAI: |
|
|
key = (key or "").strip() |
|
|
if not key: |
|
|
raise gr.Error("Please enter your OpenAI API key.") |
|
|
return OpenAI(api_key=key) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def stream_chat(api_key: str, user_input: str, history: List[Dict]): |
|
|
client = get_client(api_key) |
|
|
history = history or [] |
|
|
user_input = (user_input or "").strip() |
|
|
if not user_input: |
|
|
return history, history, gr.update(value="") |
|
|
|
|
|
msgs = history + [{"role": "user", "content": user_input}] |
|
|
try: |
|
|
stream = client.chat.completions.create( |
|
|
model="gpt-5", |
|
|
messages=msgs, |
|
|
stream=True, |
|
|
) |
|
|
acc = "" |
|
|
for chunk in stream: |
|
|
delta = chunk.choices[0].delta.content or "" |
|
|
acc += delta |
|
|
yield msgs + [{"role": "assistant", "content": acc}], msgs, gr.update(value="") |
|
|
final_hist = msgs + [{"role": "assistant", "content": acc}] |
|
|
yield final_hist, final_hist, gr.update(value="") |
|
|
except Exception as e: |
|
|
err = f"[Error] {e}" |
|
|
final_hist = msgs + [{"role": "assistant", "content": err}] |
|
|
yield final_hist, final_hist, gr.update(value="") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEXT_EXTS = {".txt", ".md", ".markdown"} |
|
|
DOCX_EXTS = {".docx"} |
|
|
PDF_EXTS = {".pdf"} |
|
|
CSV_EXTS = {".csv"} |
|
|
|
|
|
def _ext(path: str) -> str: |
|
|
return os.path.splitext(path.lower())[1] |
|
|
|
|
|
def _coerce_paths(files: List[Union[str, dict, gr.File]]) -> List[str]: |
|
|
""" |
|
|
Gradio may send: |
|
|
- list[str] of absolute filepaths (when type='filepath') |
|
|
- list[dict] with {'name': '/tmp/..'} in some versions |
|
|
- list[gr.File] objects with .name |
|
|
Normalize to list[str] filepaths. |
|
|
""" |
|
|
paths = [] |
|
|
for f in files or []: |
|
|
if isinstance(f, str): |
|
|
paths.append(f) |
|
|
elif isinstance(f, dict) and "name" in f: |
|
|
paths.append(f["name"]) |
|
|
else: |
|
|
|
|
|
name = getattr(f, "name", None) |
|
|
if name: |
|
|
paths.append(name) |
|
|
return paths |
|
|
|
|
|
def read_text_file(fp: str) -> str: |
|
|
try: |
|
|
with open(fp, "r", encoding="utf-8") as f: |
|
|
return f.read() |
|
|
except UnicodeDecodeError: |
|
|
with open(fp, "r", encoding="latin-1") as f: |
|
|
return f.read() |
|
|
|
|
|
def read_pdf(fp: str) -> str: |
|
|
text = [] |
|
|
with open(fp, "rb") as f: |
|
|
reader = PdfReader(f) |
|
|
for page in reader.pages: |
|
|
txt = page.extract_text() or "" |
|
|
text.append(txt) |
|
|
return "\n".join(text).strip() |
|
|
|
|
|
def read_docx(fp: str) -> str: |
|
|
doc = DocxDocument(fp) |
|
|
return "\n".join([p.text for p in doc.paragraphs]).strip() |
|
|
|
|
|
def summarize_csv(fp: str) -> str: |
|
|
|
|
|
read_attempts = [ |
|
|
dict(), |
|
|
dict(sep=";"), |
|
|
dict(sep="\t"), |
|
|
] |
|
|
last_err = None |
|
|
df = None |
|
|
for kwargs in read_attempts: |
|
|
try: |
|
|
df = pd.read_csv(fp, **kwargs) |
|
|
break |
|
|
except Exception as e: |
|
|
last_err = e |
|
|
if df is None: |
|
|
raise gr.Error(f"Could not read CSV: {last_err}") |
|
|
|
|
|
shape_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}" |
|
|
cols = ", ".join([f"{c} ({str(df[c].dtype)})" for c in df.columns]) |
|
|
try: |
|
|
desc = df.describe(include="all").transpose().fillna("").to_string() |
|
|
except Exception: |
|
|
desc = "(describe() failed for this CSV)" |
|
|
try: |
|
|
head = df.head(10).to_string(index=False) |
|
|
except Exception: |
|
|
head = "(preview failed)" |
|
|
|
|
|
return ( |
|
|
"CSV SUMMARY\n" |
|
|
f"{shape_info}\n\n" |
|
|
f"COLUMNS & TYPES:\n{cols}\n\n" |
|
|
f"DESCRIBE():\n{desc}\n\n" |
|
|
f"FIRST 10 ROWS:\n{head}\n" |
|
|
) |
|
|
|
|
|
def load_files(files: List[Union[str, dict, gr.File]], progress: gr.Progress) -> Tuple[str, List[str]]: |
|
|
paths = _coerce_paths(files) |
|
|
if not paths: |
|
|
raise gr.Error("Please upload at least one file (PDF, DOCX, TXT, MD, or CSV).") |
|
|
|
|
|
texts = [] |
|
|
names = [] |
|
|
for i, path in enumerate(paths, start=1): |
|
|
names.append(os.path.basename(path)) |
|
|
ext = _ext(path) |
|
|
progress((i-0.5)/max(len(paths), 1), desc=f"Parsing {os.path.basename(path)}") |
|
|
if ext in TEXT_EXTS: |
|
|
texts.append(read_text_file(path)) |
|
|
elif ext in PDF_EXTS: |
|
|
texts.append(read_pdf(path)) |
|
|
elif ext in DOCX_EXTS: |
|
|
texts.append(read_docx(path)) |
|
|
elif ext in CSV_EXTS: |
|
|
texts.append(summarize_csv(path)) |
|
|
else: |
|
|
raise gr.Error(f"Unsupported file type: {ext}") |
|
|
progress(i/max(len(paths), 1), desc=f"Parsed {os.path.basename(path)}") |
|
|
return "\n\n-----\n\n".join(texts), names |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chunk_text(s: str, max_chars: int = 12000) -> List[str]: |
|
|
s = (s or "").strip() |
|
|
if not s: |
|
|
return [] |
|
|
if len(s) <= max_chars: |
|
|
return [s] |
|
|
chunks = [] |
|
|
start = 0 |
|
|
while start < len(s): |
|
|
end = min(start + max_chars, len(s)) |
|
|
cut = s.rfind("\n\n", start, end) |
|
|
if cut == -1 or cut <= start + 2000: |
|
|
cut = end |
|
|
chunks.append(s[start:cut]) |
|
|
start = cut |
|
|
return chunks |
|
|
|
|
|
def llm_summarize_chunks(client: OpenAI, chunks: List[str], mode: str, custom_note: str, progress: gr.Progress) -> List[str]: |
|
|
summaries = [] |
|
|
total = len(chunks) |
|
|
if total == 0: |
|
|
return summaries |
|
|
|
|
|
mode_prompt = { |
|
|
"Executive Brief": ( |
|
|
"Create a crisp executive brief with sections: Context, Key Findings, Metrics, Implications, Decisions Needed." |
|
|
), |
|
|
"Action Items": ( |
|
|
"Extract actionable tasks with owners (if available), deadlines (if implied), dependencies, and priority." |
|
|
), |
|
|
"Risks & Mitigations": ( |
|
|
"Identify key risks, likelihood, impact, and concrete mitigations. Include watchpoints and triggers." |
|
|
), |
|
|
"Meeting Minutes": ( |
|
|
"Produce clean, structured minutes: Attendees (if inferable), Agenda, Discussion, Decisions, Action Items." |
|
|
), |
|
|
"JSON Summary": ( |
|
|
"Return a compact JSON with keys: context, findings[], metrics{}, actions[], risks[], decisions[]." |
|
|
), |
|
|
}[mode] |
|
|
|
|
|
for i, ch in enumerate(chunks, start=1): |
|
|
progress(0.2 + 0.6*(i-1)/max(total,1), desc=f"Summarizing chunk {i}/{total}") |
|
|
sys = "You are a senior analyst. Write succinctly; use bullet points where appropriate." |
|
|
usr = f"{mode_prompt}\n\n{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n---\nSOURCE CHUNK {i}/{total}:\n{ch}\n" |
|
|
resp = client.chat.completions.create( |
|
|
model="gpt-5", |
|
|
messages=[{"role": "system", "content": sys}, |
|
|
{"role": "user", "content": usr}], |
|
|
) |
|
|
summaries.append(resp.choices[0].message.content.strip()) |
|
|
progress(0.2 + 0.6*(i)/max(total,1), desc=f"Summarized chunk {i}/{total}") |
|
|
return summaries |
|
|
|
|
|
def llm_synthesize_final(client: OpenAI, mode: str, names: List[str], partials: List[str], custom_note: str, progress: gr.Progress) -> str: |
|
|
progress(0.85, desc="Synthesizing final deliverable") |
|
|
sys = "You are a chief of staff producing board-ready output. Tight, accurate, and well-structured." |
|
|
corpus = "\n\n---\n\n".join([f"[PART {i+1}]\n{p}" for i, p in enumerate(partials)]) |
|
|
usr = ( |
|
|
f"Files analyzed: {', '.join(names)}\n\n" |
|
|
f"Mode: {mode}\n" |
|
|
f"{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n" |
|
|
"Synthesize the PARTS into a single cohesive deliverable. If JSON mode, return only JSON." |
|
|
"\n\n---\nCORPUS (SUMMARIES):\n" + corpus |
|
|
) |
|
|
resp = client.chat.completions.create( |
|
|
model="gpt-5", |
|
|
messages=[{"role": "system", "content": sys}, |
|
|
{"role": "user", "content": usr}], |
|
|
) |
|
|
progress(0.98, desc="Finalizing") |
|
|
return resp.choices[0].message.content.strip() |
|
|
|
|
|
def pro_brief(api_key: str, files: List[Union[str, dict, gr.File]], mode: str, custom_note: str): |
|
|
progress = gr.Progress(track_tqdm=False) |
|
|
client = get_client(api_key) |
|
|
|
|
|
|
|
|
progress(0.02, desc="Loading files") |
|
|
out = "π **Loading files...**\n" |
|
|
yield out |
|
|
|
|
|
raw_text, names = load_files(files, progress) |
|
|
out += f"β
Parsed {len(names)} file(s): {', '.join(names)}\n" |
|
|
yield out |
|
|
|
|
|
|
|
|
progress(0.18, desc="Chunking text") |
|
|
chunks = chunk_text(raw_text, max_chars=12000) |
|
|
out += f"π§± Created {len(chunks)} chunk(s) for analysis\n" |
|
|
yield out |
|
|
|
|
|
|
|
|
partials = llm_summarize_chunks(client, chunks, mode, custom_note, progress) |
|
|
out += f"π§ Summarized {len(partials)} chunk(s)\n" |
|
|
yield out |
|
|
|
|
|
|
|
|
final = llm_synthesize_final(client, mode, names, partials, custom_note, progress) |
|
|
|
|
|
|
|
|
progress(1.0, desc="Done") |
|
|
if mode == "JSON Summary": |
|
|
yield "```json\n" + final + "\n```" |
|
|
else: |
|
|
yield final |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="ZEN GPT-5 β’ Production Tools") as demo: |
|
|
gr.Markdown("### π Enter your OpenAI API key (not stored)") |
|
|
api_key = gr.Textbox(placeholder="sk-...", type="password", label="OpenAI API Key") |
|
|
|
|
|
with gr.Tab("π¬ Chat"): |
|
|
chatbox = gr.Chatbot(label="GPT-5 Chat", height=420, type="messages") |
|
|
history_state = gr.State([]) |
|
|
user_in = gr.Textbox(placeholder="Say hiβ¦", label="Message") |
|
|
send_btn = gr.Button("Send", variant="primary") |
|
|
clear_btn = gr.Button("Clear Chat") |
|
|
|
|
|
send_btn.click(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True) |
|
|
user_in.submit(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True) |
|
|
clear_btn.click(lambda: ([], []), None, [chatbox, history_state]) |
|
|
|
|
|
with gr.Tab("π Pro Brief (Docs β Executive Output)"): |
|
|
gr.Markdown( |
|
|
"Upload PDFs, DOCX, TXT, MD, or CSV. Get an **Executive Brief**, **Action Items**, " |
|
|
"**Risks & Mitigations**, **Meeting Minutes**, or a **JSON Summary**." |
|
|
) |
|
|
files = gr.File(label="Upload files", file_count="multiple", type="filepath") |
|
|
mode = gr.Radio( |
|
|
["Executive Brief", "Action Items", "Risks & Mitigations", "Meeting Minutes", "JSON Summary"], |
|
|
value="Executive Brief", |
|
|
label="Output Mode", |
|
|
) |
|
|
custom = gr.Textbox(label="Optional guidance (tone, audience, focus areas)", lines=3, |
|
|
placeholder="e.g., Board-ready; focus on budget impact and timeline risk.") |
|
|
run = gr.Button("Generate Pro Brief", variant="primary") |
|
|
out = gr.Markdown(label="Output", show_copy_button=True) |
|
|
|
|
|
|
|
|
run.click(pro_brief, [api_key, files, mode, custom], out, queue=True) |
|
|
|
|
|
|
|
|
gr.HTML( |
|
|
"<div style='text-align:right; font-size:12px; opacity:0.55; margin-top:10px;'>" |
|
|
"Module 3 β ZEN SDK Production" |
|
|
"</div>" |
|
|
) |
|
|
|
|
|
|
|
|
demo.queue(max_size=64).launch() |
|
|
|