| import base64 |
| import html |
| import mimetypes |
| import os |
| from pathlib import Path |
| from typing import Any, Dict, List |
|
|
| import gradio as gr |
| from openai import OpenAI |
|
|
| DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking") |
| BASE_URL = os.getenv("BASE_URL","") |
| api_key = os.getenv("ERNIE_API_KEY","") |
|
|
|
|
| CUSTOM_CSS = """ |
| body { |
| background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%); |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif; |
| color: #0f172a; |
| } |
| .gradio-container { |
| max-width: 1200px !important; |
| margin: 0 auto; |
| } |
| #ernie-hero { |
| padding: 12px 0 4px; |
| } |
| #ernie-hero h1 { |
| font-size: 1.85rem; |
| margin-bottom: 0; |
| font-weight: 500; |
| } |
| #model-link { |
| margin-top: 6px; |
| font-size: 0.95rem; |
| } |
| #model-link a { |
| color: #4c1d95; |
| text-decoration: none; |
| font-weight: 500; |
| } |
| #model-link a:hover { |
| text-decoration: underline; |
| } |
| #examples-panel { |
| margin-top: 20px; |
| padding: 18px 22px; |
| border-radius: 18px; |
| border: 1px solid rgba(15, 23, 42, 0.12); |
| background: rgba(255, 255, 255, 0.92); |
| box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08); |
| gap: 18px; |
| } |
| #examples-panel h4 { |
| margin: 0 0 8px; |
| font-size: 1.1rem; |
| font-weight: 500; |
| } |
| #examples-panel p { |
| margin: 0; |
| color: rgba(15, 23, 42, 0.7); |
| font-size: 0.95rem; |
| } |
| #examples-grid table { |
| width: 100%; |
| } |
| #examples-grid table tbody { |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); |
| gap: 12px; |
| } |
| #examples-grid table tr { |
| display: block; |
| background: #f7f9ff; |
| border-radius: 14px; |
| border: 1px solid rgba(15, 23, 42, 0.08); |
| padding: 14px; |
| box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08); |
| } |
| #examples-grid table td { |
| display: block; |
| padding: 0; |
| } |
| #chat-wrapper { |
| margin-top: 32px; |
| border-radius: 24px; |
| padding: 18px; |
| background: rgba(255, 255, 255, 0.95); |
| border: 1px solid rgba(15, 23, 42, 0.1); |
| box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12); |
| } |
| .ernie-section { |
| border-radius: 18px; |
| margin-bottom: 14px; |
| padding: 16px 18px; |
| border: 1px solid rgba(15, 23, 42, 0.1); |
| background: rgba(255, 255, 255, 0.95); |
| box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08); |
| } |
| .ernie-section-header { |
| font-size: 0.85rem; |
| text-transform: uppercase; |
| letter-spacing: 0.08em; |
| font-weight: 600; |
| color: rgba(15, 23, 42, 0.65); |
| display: flex; |
| align-items: center; |
| gap: 6px; |
| } |
| .ernie-section-body { |
| margin-top: 10px; |
| font-size: 1rem; |
| color: rgba(15, 23, 42, 0.92); |
| white-space: pre-wrap; |
| line-height: 1.55; |
| } |
| .ernie-thinking { |
| border-color: rgba(79, 70, 229, 0.35); |
| background: rgba(129, 140, 248, 0.08); |
| } |
| .ernie-answer { |
| border-color: rgba(16, 185, 129, 0.35); |
| background: rgba(110, 231, 183, 0.08); |
| } |
| |
| @media (prefers-color-scheme: dark) { |
| body { |
| background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%); |
| color: #ecf2ff; |
| } |
| #model-link a { |
| color: #a5b4fc; |
| } |
| #examples-panel { |
| border: 1px solid rgba(255, 255, 255, 0.05); |
| background: rgba(8, 13, 30, 0.85); |
| box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55); |
| } |
| #examples-panel p { |
| color: rgba(236, 242, 255, 0.75); |
| } |
| #examples-grid table tr { |
| background: rgba(15, 23, 42, 0.7); |
| border: 1px solid rgba(255, 255, 255, 0.04); |
| box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45); |
| } |
| #chat-wrapper { |
| background: rgba(2, 6, 23, 0.78); |
| border: 1px solid rgba(99, 102, 241, 0.25); |
| box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7); |
| } |
| .ernie-section { |
| border: 1px solid rgba(255, 255, 255, 0.08); |
| background: rgba(15, 23, 42, 0.85); |
| box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55); |
| } |
| .ernie-section-header { |
| color: rgba(236, 242, 255, 0.75); |
| } |
| .ernie-section-body { |
| color: rgba(248, 250, 255, 0.95); |
| } |
| .ernie-answer { |
| border-color: rgba(45, 212, 191, 0.45); |
| background: rgba(8, 47, 56, 0.65); |
| } |
| .ernie-thinking { |
| border-color: rgba(165, 180, 252, 0.4); |
| background: rgba(30, 27, 75, 0.65); |
| } |
| } |
| """ |
|
|
| _client = OpenAI( |
| base_url=BASE_URL, |
| api_key=api_key, |
| ) |
|
|
| def _data_url(path: str) -> str: |
| mime, _ = mimetypes.guess_type(path) |
| mime = mime or "application/octet-stream" |
| data = base64.b64encode(Path(path).read_bytes()).decode("utf-8") |
| return f"data:{mime};base64,{data}" |
|
|
| def _media_content(path: str) -> Dict[str, Any]: |
| """支持图片和视频""" |
| mime, _ = mimetypes.guess_type(path) |
| if mime and mime.startswith("video"): |
| |
| return {"type": "video_url", "video_url": {"url": _data_url(path)}} |
| else: |
| |
| return {"type": "image_url", "image_url": {"url": _data_url(path)}} |
|
|
| def _text_content(text: str) -> Dict[str, Any]: |
| return {"type": "text", "text": text} |
|
|
| def _message(role: str, content: Any) -> Dict[str, Any]: |
| return {"role": role, "content": content} |
|
|
| def _format_sections(thinking: str, answer: str | None = None) -> str: |
| """Render Thinking/Answer blocks with HTML so the chatbot can style them.""" |
| def _build_block(kind: str, label: str, text: str, icon: str) -> str: |
| text = (text or "").strip() |
| if not text: |
| return "" |
| escaped = html.escape(text) |
| return ( |
| f'<div class="ernie-section ernie-{kind}">' |
| f'<div class="ernie-section-header">{icon} {label}</div>' |
| f'<div class="ernie-section-body">{escaped}</div>' |
| "</div>" |
| ) |
|
|
| sections = [ |
| _build_block("thinking", "Thinking", thinking, "🧠"), |
| _build_block("answer", "Answer", answer, "✨") if answer is not None else "", |
| ] |
| rendered = "".join(section for section in sections if section) |
| return rendered |
|
|
| def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]: |
| files = message.get("files") or [] |
| text = (message.get("text") or "").strip() |
| content: List[Dict[str, Any]] = [_media_content(p) for p in files] |
| if text: |
| content.append(_text_content(text)) |
| return _message("user", content) |
|
|
| def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]: |
| msgs: List[Dict[str, Any]] = [] |
| user_content: List[Dict[str, Any]] = [] |
| |
| for turn in history or []: |
| role, content = turn.get("role"), turn.get("content") |
| if role == "user": |
| if isinstance(content, str): |
| user_content.append(_text_content(content)) |
| elif isinstance(content, tuple): |
| user_content.extend(_media_content(path) for path in content if path) |
| elif role == "assistant": |
| if "Answer:\n" in content: |
| |
| answer_only = content.split("Answer:\n", 1)[1].strip() |
| else: |
| |
| answer_only = content.strip() |
|
|
| if user_content: |
| msgs.append(_message("user", user_content.copy())) |
| user_content.clear() |
|
|
| msgs.append(_message("assistant", [{"type": "text", "text": answer_only}])) |
| |
| return msgs |
|
|
|
|
| def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], model_name: str = DEFAULT_MODEL): |
| messages = _convert_history(history) |
| messages.append(_build_user_message(message)) |
| |
| try: |
| stream = _client.chat.completions.create( |
| model="default", |
| messages=messages, |
| stream=True |
| ) |
| thinking_parts: List[str] = [] |
| answer_parts: List[str] = [] |
| answer_started = False |
|
|
| for chunk in stream: |
| delta = chunk.choices[0].delta |
|
|
| if getattr(delta, "reasoning_content", None): |
| thinking_parts.append(delta.reasoning_content) |
|
|
| if getattr(delta, "content", None): |
| answer_started = True |
| answer_parts.append(delta.content) |
|
|
| thinking_text = "".join(thinking_parts) |
| answer_text = "".join(answer_parts) if answer_parts else None |
|
|
| if answer_started: |
| rendered = _format_sections(thinking_text, answer_text) |
| else: |
| rendered = _format_sections(thinking_text) |
|
|
| if rendered: |
| yield rendered |
|
|
| if not answer_started and thinking_parts: |
| |
| rendered = _format_sections("".join(thinking_parts)) |
| if rendered: |
| yield rendered |
| except Exception as e: |
| yield f"Failed to get response: {e}" |
|
|
| def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] | None = None): |
| """ |
| 用于 Examples 点击时直接走大模型。 |
| - 输入还是 ChatInterface 那种 message dict:{"text": ..., "files": [...]} |
| - history 是 Chatbot 当前的消息列表(type="messages") |
| - 输出改成 Chatbot 需要的消息列表:[{role, content}, ...] |
| """ |
| history = history or [] |
|
|
| |
| for rendered in stream_response(message, history): |
| |
| user_text = (message.get("text") or "").strip() or "[Example]" |
| display_history = history + [ |
| {"role": "user", "content": user_text}, |
| {"role": "assistant", "content": rendered}, |
| ] |
| |
| yield display_history |
|
|
| def build_demo() -> gr.Blocks: |
| theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate") |
|
|
| with gr.Blocks( |
| title="ERNIE-4.5-VL-28B-A3B-Thinking", |
| theme=theme, |
| css=CUSTOM_CSS, |
| ) as demo: |
| with gr.Column(elem_id="ernie-hero"): |
| gr.Markdown( |
| """ |
| <h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1> |
| """, |
| elem_id="hero-text", |
| ) |
| gr.Markdown( |
| """ |
| <p id="model-link"> |
| Model Repository: |
| <a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener"> |
| ERNIE-4.5-VL-28B-A3B-Thinking |
| </a> |
| </p> |
| """ |
| ) |
|
|
| textbox = gr.MultimodalTextbox( |
| show_label=False, |
| placeholder="Enter text, or upload one or more images...", |
| file_types=["image","video"], |
| file_count="multiple" |
| ) |
| chatbot = gr.Chatbot( |
| type="messages", |
| allow_tags=["think"], |
| height=560, |
| render_markdown=True, |
| show_copy_button=True, |
| ) |
| |
| examples = [ |
| { |
| "text": "这道题怎么解", |
| "files": ["examples/case1.png"] |
| }, |
| { |
| "text": "How many real people are actually in the picture?", |
| "files": ["examples/case2.png"] |
| }, |
| ] |
|
|
| with gr.Column(elem_id="examples-panel"): |
| gr.Examples( |
| examples=examples, |
| inputs=textbox, |
| label=None, |
| examples_per_page=4, |
| elem_id="examples-grid", |
| fn=run_example, |
| outputs=chatbot, |
| run_on_click=True, |
| ) |
|
|
| with gr.Column(elem_id="chat-wrapper"): |
| chat_interface = gr.ChatInterface( |
| fn=stream_response, |
| type="messages", |
| multimodal=True, |
| chatbot=chatbot, |
| textbox=textbox, |
| ) |
|
|
|
|
| return demo.queue(default_concurrency_limit=8) |
|
|
|
|
|
|
| if __name__ == "__main__": |
| build_demo().launch() |
|
|