Spaces:

heerjtdev
/

yolo_layoutlm

Sleeping

File size: 5,657 Bytes

import gradio as gr
import json
import os
import tempfile
from pathlib import Path

# NOTE: You must ensure that 'working_yolo_pipeline.py' exists 
# and defines the following items correctly:
from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
# Since I don't have this file, I am assuming the imports are correct.

# Define placeholders for assumed constants if the pipeline file isn't present
# You should replace these with your actual definitions if they are missing
try:
    from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
except ImportError:
    print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
    def run_document_pipeline(*args):
        return {"error": "Placeholder pipeline function called."}
    DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
    WEIGHTS_PATH = "./weights/yolo_weights.pt"


def process_pdf(pdf_file, layoutlmv3_model_path=None):
    """
    Wrapper function for Gradio interface.

    Args:
        pdf_file: Gradio UploadButton file object
        layoutlmv3_model_path: Optional custom model path

    Returns:
        Tuple of (JSON string, download file path)
    """
    if pdf_file is None:
        return "❌ Error: No PDF file uploaded.", None

    # Use default model path if not provided
    if not layoutlmv3_model_path:
        layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH

    # Verify model and weights exist
    if not os.path.exists(layoutlmv3_model_path):
        return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None

    if not os.path.exists(WEIGHTS_PATH):
        return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None

    try:
        # Get the uploaded PDF path
        pdf_path = pdf_file.name

        # Run the pipeline
        result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')

        if result is None:
            return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None

        # Create a temporary file for download
        output_filename = f"{Path(pdf_path).stem}_analysis.json"
        temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')

        # Dump results to the temporary file
        with open(temp_output.name, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)

        # Format JSON for display
        json_display = json.dumps(result, indent=2, ensure_ascii=False)

        return json_display, temp_output.name

    except Exception as e:
        return f"❌ Error during processing: {str(e)}", None


# Create Gradio interface
# FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError
with gr.Blocks(title="Document Analysis Pipeline") as demo:
    gr.Markdown("""
    # 📄 Document Analysis Pipeline

    Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.

    **Pipeline Steps:**
    1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
    2. 🤖 LayoutLMv3 Inference (BIO tagging)
    3. 📊 Structured JSON Decoding
    4. 🖼️ Base64 Image Embedding
    """)

    with gr.Row():
        with gr.Column(scale=1):
            pdf_input = gr.File(
                label="Upload PDF Document",
                file_types=[".pdf"],
                type="filepath"
            )

            model_path_input = gr.Textbox(
                label="LayoutLMv3 Model Path (optional)",
                placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
                value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
                interactive=True
            )

            process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")

            gr.Markdown("""
            ### ℹ️ Notes:
            - Processing may take several minutes depending on PDF size
            - Figures and equations will be extracted and embedded as Base64
            - The output JSON includes structured questions, options, and answers
            """)

        with gr.Column(scale=2):
            json_output = gr.Code(
                label="Structured JSON Output",
                language="json",
                lines=25
            )

            download_output = gr.File(
                label="Download Full JSON",
                interactive=False
            )

    # Status/Examples section
    with gr.Row():
        gr.Markdown("""
        ### 📋 Output Format
        The pipeline generates JSON with the following structure:
        - **Questions**: Extracted question text
        - **Options**: Multiple choice options (A, B, C, D, etc.)
        - **Answers**: Correct answer(s)
        - **Passages**: Associated reading passages
        - **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
        """)

    # Connect the button to the processing function
    process_btn.click(
        fn=process_pdf,
        inputs=[pdf_input, model_path_input],
        outputs=[json_output, download_output],
        api_name="process_document"
    )

    # Example section (optional - add example PDFs if available)
    # gr.Examples(
    #     examples=[
    #         ["examples/sample1.pdf"],
    #         ["examples/sample2.pdf"],
    #     ],
    #     inputs=pdf_input,
    # )

# Launch the app
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )