Spaces:

ZENLLC
/

M3

Sleeping

App Files Files Community

M3 / app.py

ZENLLC

Update app.py

e946c40 verified about 2 months ago

raw

history blame contribute delete

11.8 kB

	import os
	from typing import List, Dict, Tuple, Union

	import gradio as gr
	from openai import OpenAI

	# Optional parsers
	import pandas as pd
	from pypdf import PdfReader
	from docx import Document as DocxDocument


	# ===============================
	# Client
	# ===============================
	def get_client(key: str) -> OpenAI:
	key = (key or "").strip()
	if not key:
	raise gr.Error("Please enter your OpenAI API key.")
	return OpenAI(api_key=key)


	# ===============================
	# Chat (messages format + streaming)
	# ===============================
	def stream_chat(api_key: str, user_input: str, history: List[Dict]):
	client = get_client(api_key)
	history = history or []
	user_input = (user_input or "").strip()
	if not user_input:
	return history, history, gr.update(value="")

	msgs = history + [{"role": "user", "content": user_input}]
	try:
	stream = client.chat.completions.create(
	model="gpt-5",
	messages=msgs,
	stream=True,
	)
	acc = ""
	for chunk in stream:
	delta = chunk.choices[0].delta.content or ""
	acc += delta
	yield msgs + [{"role": "assistant", "content": acc}], msgs, gr.update(value="")
	final_hist = msgs + [{"role": "assistant", "content": acc}]
	yield final_hist, final_hist, gr.update(value="")
	except Exception as e:
	err = f"[Error] {e}"
	final_hist = msgs + [{"role": "assistant", "content": err}]
	yield final_hist, final_hist, gr.update(value="")


	# ===============================
	# Pro Brief – File ingestion
	# ===============================
	TEXT_EXTS = {".txt", ".md", ".markdown"}
	DOCX_EXTS = {".docx"}
	PDF_EXTS = {".pdf"}
	CSV_EXTS = {".csv"}

	def _ext(path: str) -> str:
	return os.path.splitext(path.lower())[1]

	def _coerce_paths(files: List[Union[str, dict, gr.File]]) -> List[str]:
	"""
	Gradio may send:
	- list[str] of absolute filepaths (when type='filepath')
	- list[dict] with {'name': '/tmp/..'} in some versions
	- list[gr.File] objects with .name
	Normalize to list[str] filepaths.
	"""
	paths = []
	for f in files or []:
	if isinstance(f, str):
	paths.append(f)
	elif isinstance(f, dict) and "name" in f:
	paths.append(f["name"])
	else:
	# gr.File or other object with .name
	name = getattr(f, "name", None)
	if name:
	paths.append(name)
	return paths

	def read_text_file(fp: str) -> str:
	try:
	with open(fp, "r", encoding="utf-8") as f:
	return f.read()
	except UnicodeDecodeError:
	with open(fp, "r", encoding="latin-1") as f:
	return f.read()

	def read_pdf(fp: str) -> str:
	text = []
	with open(fp, "rb") as f:
	reader = PdfReader(f)
	for page in reader.pages:
	txt = page.extract_text() or ""
	text.append(txt)
	return "\n".join(text).strip()

	def read_docx(fp: str) -> str:
	doc = DocxDocument(fp)
	return "\n".join([p.text for p in doc.paragraphs]).strip()

	def summarize_csv(fp: str) -> str:
	# Robust CSV read with separator fallbacks
	read_attempts = [
	dict(),
	dict(sep=";"),
	dict(sep="\t"),
	]
	last_err = None
	df = None
	for kwargs in read_attempts:
	try:
	df = pd.read_csv(fp, **kwargs)
	break
	except Exception as e:
	last_err = e
	if df is None:
	raise gr.Error(f"Could not read CSV: {last_err}")

	shape_info = f"Rows: {df.shape[0]}, Columns: {df.shape[1]}"
	cols = ", ".join([f"{c} ({str(df[c].dtype)})" for c in df.columns])
	try:
	desc = df.describe(include="all").transpose().fillna("").to_string()
	except Exception:
	desc = "(describe() failed for this CSV)"
	try:
	head = df.head(10).to_string(index=False)
	except Exception:
	head = "(preview failed)"

	return (
	"CSV SUMMARY\n"
	f"{shape_info}\n\n"
	f"COLUMNS & TYPES:\n{cols}\n\n"
	f"DESCRIBE():\n{desc}\n\n"
	f"FIRST 10 ROWS:\n{head}\n"
	)

	def load_files(files: List[Union[str, dict, gr.File]], progress: gr.Progress) -> Tuple[str, List[str]]:
	paths = _coerce_paths(files)
	if not paths:
	raise gr.Error("Please upload at least one file (PDF, DOCX, TXT, MD, or CSV).")

	texts = []
	names = []
	for i, path in enumerate(paths, start=1):
	names.append(os.path.basename(path))
	ext = _ext(path)
	progress((i-0.5)/max(len(paths), 1), desc=f"Parsing {os.path.basename(path)}")
	if ext in TEXT_EXTS:
	texts.append(read_text_file(path))
	elif ext in PDF_EXTS:
	texts.append(read_pdf(path))
	elif ext in DOCX_EXTS:
	texts.append(read_docx(path))
	elif ext in CSV_EXTS:
	texts.append(summarize_csv(path))
	else:
	raise gr.Error(f"Unsupported file type: {ext}")
	progress(i/max(len(paths), 1), desc=f"Parsed {os.path.basename(path)}")
	return "\n\n-----\n\n".join(texts), names


	# ===============================
	# Pro Brief – Chunking & synthesis
	# ===============================
	def chunk_text(s: str, max_chars: int = 12000) -> List[str]:
	s = (s or "").strip()
	if not s:
	return []
	if len(s) <= max_chars:
	return [s]
	chunks = []
	start = 0
	while start < len(s):
	end = min(start + max_chars, len(s))
	cut = s.rfind("\n\n", start, end)
	if cut == -1 or cut <= start + 2000:
	cut = end
	chunks.append(s[start:cut])
	start = cut
	return chunks

	def llm_summarize_chunks(client: OpenAI, chunks: List[str], mode: str, custom_note: str, progress: gr.Progress) -> List[str]:
	summaries = []
	total = len(chunks)
	if total == 0:
	return summaries

	mode_prompt = {
	"Executive Brief": (
	"Create a crisp executive brief with sections: Context, Key Findings, Metrics, Implications, Decisions Needed."
	),
	"Action Items": (
	"Extract actionable tasks with owners (if available), deadlines (if implied), dependencies, and priority."
	),
	"Risks & Mitigations": (
	"Identify key risks, likelihood, impact, and concrete mitigations. Include watchpoints and triggers."
	),
	"Meeting Minutes": (
	"Produce clean, structured minutes: Attendees (if inferable), Agenda, Discussion, Decisions, Action Items."
	),
	"JSON Summary": (
	"Return a compact JSON with keys: context, findings[], metrics{}, actions[], risks[], decisions[]."
	),
	}[mode]

	for i, ch in enumerate(chunks, start=1):
	progress(0.2 + 0.6*(i-1)/max(total,1), desc=f"Summarizing chunk {i}/{total}")
	sys = "You are a senior analyst. Write succinctly; use bullet points where appropriate."
	usr = f"{mode_prompt}\n\n{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n---\nSOURCE CHUNK {i}/{total}:\n{ch}\n"
	resp = client.chat.completions.create(
	model="gpt-5",
	messages=[{"role": "system", "content": sys},
	{"role": "user", "content": usr}],
	)
	summaries.append(resp.choices[0].message.content.strip())
	progress(0.2 + 0.6*(i)/max(total,1), desc=f"Summarized chunk {i}/{total}")
	return summaries

	def llm_synthesize_final(client: OpenAI, mode: str, names: List[str], partials: List[str], custom_note: str, progress: gr.Progress) -> str:
	progress(0.85, desc="Synthesizing final deliverable")
	sys = "You are a chief of staff producing board-ready output. Tight, accurate, and well-structured."
	corpus = "\n\n---\n\n".join([f"[PART {i+1}]\n{p}" for i, p in enumerate(partials)])
	usr = (
	f"Files analyzed: {', '.join(names)}\n\n"
	f"Mode: {mode}\n"
	f"{('Additional guidance: ' + custom_note) if custom_note else ''}\n\n"
	"Synthesize the PARTS into a single cohesive deliverable. If JSON mode, return only JSON."
	"\n\n---\nCORPUS (SUMMARIES):\n" + corpus
	)
	resp = client.chat.completions.create(
	model="gpt-5",
	messages=[{"role": "system", "content": sys},
	{"role": "user", "content": usr}],
	)
	progress(0.98, desc="Finalizing")
	return resp.choices[0].message.content.strip()

	def pro_brief(api_key: str, files: List[Union[str, dict, gr.File]], mode: str, custom_note: str):
	progress = gr.Progress(track_tqdm=False)
	client = get_client(api_key)

	# Stage 1: Load files
	progress(0.02, desc="Loading files")
	out = "🔎 Loading files...\n"
	yield out

	raw_text, names = load_files(files, progress)
	out += f"✅ Parsed {len(names)} file(s): {', '.join(names)}\n"
	yield out

	# Stage 2: Chunk
	progress(0.18, desc="Chunking text")
	chunks = chunk_text(raw_text, max_chars=12000)
	out += f"🧱 Created {len(chunks)} chunk(s) for analysis\n"
	yield out

	# Stage 3: Summarize chunks
	partials = llm_summarize_chunks(client, chunks, mode, custom_note, progress)
	out += f"🧠 Summarized {len(partials)} chunk(s)\n"
	yield out

	# Stage 4: Synthesize final
	final = llm_synthesize_final(client, mode, names, partials, custom_note, progress)

	# Done
	progress(1.0, desc="Done")
	if mode == "JSON Summary":
	yield "```json\n" + final + "\n```"
	else:
	yield final


	# ===============================
	# UI
	# ===============================
	with gr.Blocks(title="ZEN GPT-5 • Production Tools") as demo:
	gr.Markdown("### 🔐 Enter your OpenAI API key (not stored)")
	api_key = gr.Textbox(placeholder="sk-...", type="password", label="OpenAI API Key")

	with gr.Tab("💬 Chat"):
	chatbox = gr.Chatbot(label="GPT-5 Chat", height=420, type="messages")
	history_state = gr.State([])
	user_in = gr.Textbox(placeholder="Say hi…", label="Message")
	send_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear Chat")

	send_btn.click(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True)
	user_in.submit(stream_chat, [api_key, user_in, history_state], [chatbox, history_state, user_in], queue=True)
	clear_btn.click(lambda: ([], []), None, [chatbox, history_state])

	with gr.Tab("📄 Pro Brief (Docs → Executive Output)"):
	gr.Markdown(
	"Upload PDFs, DOCX, TXT, MD, or CSV. Get an Executive Brief, Action Items, "
	"Risks & Mitigations, Meeting Minutes, or a JSON Summary."
	)
	files = gr.File(label="Upload files", file_count="multiple", type="filepath")
	mode = gr.Radio(
	["Executive Brief", "Action Items", "Risks & Mitigations", "Meeting Minutes", "JSON Summary"],
	value="Executive Brief",
	label="Output Mode",
	)
	custom = gr.Textbox(label="Optional guidance (tone, audience, focus areas)", lines=3,
	placeholder="e.g., Board-ready; focus on budget impact and timeline risk.")
	run = gr.Button("Generate Pro Brief", variant="primary")
	out = gr.Markdown(label="Output", show_copy_button=True)

	# Connect generator: yields interim status + final report
	run.click(pro_brief, [api_key, files, mode, custom], out, queue=True)

	# Subtle program stamp
	gr.HTML(
	"<div style='text-align:right; font-size:12px; opacity:0.55; margin-top:10px;'>"
	"Module 3 – ZEN SDK Production"
	"</div>"
	)

	# Enable queuing (progress & concurrency-friendly)
	demo.queue(max_size=64).launch()