Spaces:
Sleeping
Sleeping
File size: 5,657 Bytes
bbc2086 58943c0 bbc2086 58943c0 bbc2086 333f6d1 bbc2086 58943c0 bbc2086 58943c0 bbc2086 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import json
import os
import tempfile
from pathlib import Path
# NOTE: You must ensure that 'working_yolo_pipeline.py' exists
# and defines the following items correctly:
from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
# Since I don't have this file, I am assuming the imports are correct.
# Define placeholders for assumed constants if the pipeline file isn't present
# You should replace these with your actual definitions if they are missing
try:
from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
except ImportError:
print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
def run_document_pipeline(*args):
return {"error": "Placeholder pipeline function called."}
DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
WEIGHTS_PATH = "./weights/yolo_weights.pt"
def process_pdf(pdf_file, layoutlmv3_model_path=None):
"""
Wrapper function for Gradio interface.
Args:
pdf_file: Gradio UploadButton file object
layoutlmv3_model_path: Optional custom model path
Returns:
Tuple of (JSON string, download file path)
"""
if pdf_file is None:
return "❌ Error: No PDF file uploaded.", None
# Use default model path if not provided
if not layoutlmv3_model_path:
layoutlmv3_model_path = DEFAULT_LAYOUTLMV3_MODEL_PATH
# Verify model and weights exist
if not os.path.exists(layoutlmv3_model_path):
return f"❌ Error: LayoutLMv3 model not found at {layoutlmv3_model_path}", None
if not os.path.exists(WEIGHTS_PATH):
return f"❌ Error: YOLO weights not found at {WEIGHTS_PATH}", None
try:
# Get the uploaded PDF path
pdf_path = pdf_file.name
# Run the pipeline
result = run_document_pipeline(pdf_path, layoutlmv3_model_path, 'label_studio_import.json')
if result is None:
return "❌ Error: Pipeline failed to process the PDF. Check console for details.", None
# Create a temporary file for download
output_filename = f"{Path(pdf_path).stem}_analysis.json"
temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
# Dump results to the temporary file
with open(temp_output.name, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=2, ensure_ascii=False)
# Format JSON for display
json_display = json.dumps(result, indent=2, ensure_ascii=False)
return json_display, temp_output.name
except Exception as e:
return f"❌ Error during processing: {str(e)}", None
# Create Gradio interface
# FIX APPLIED: Removed 'theme=gr.themes.Soft()' which caused the TypeError
with gr.Blocks(title="Document Analysis Pipeline") as demo:
gr.Markdown("""
# 📄 Document Analysis Pipeline
Upload a PDF document to extract structured data including questions, options, answers, passages, and embedded images.
**Pipeline Steps:**
1. 🔍 YOLO/OCR Preprocessing (word extraction + figure/equation detection)
2. 🤖 LayoutLMv3 Inference (BIO tagging)
3. 📊 Structured JSON Decoding
4. 🖼️ Base64 Image Embedding
""")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(
label="Upload PDF Document",
file_types=[".pdf"],
type="filepath"
)
model_path_input = gr.Textbox(
label="LayoutLMv3 Model Path (optional)",
placeholder=DEFAULT_LAYOUTLMV3_MODEL_PATH,
value=DEFAULT_LAYOUTLMV3_MODEL_PATH,
interactive=True
)
process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
gr.Markdown("""
### ℹ️ Notes:
- Processing may take several minutes depending on PDF size
- Figures and equations will be extracted and embedded as Base64
- The output JSON includes structured questions, options, and answers
""")
with gr.Column(scale=2):
json_output = gr.Code(
label="Structured JSON Output",
language="json",
lines=25
)
download_output = gr.File(
label="Download Full JSON",
interactive=False
)
# Status/Examples section
with gr.Row():
gr.Markdown("""
### 📋 Output Format
The pipeline generates JSON with the following structure:
- **Questions**: Extracted question text
- **Options**: Multiple choice options (A, B, C, D, etc.)
- **Answers**: Correct answer(s)
- **Passages**: Associated reading passages
- **Images**: Base64-encoded figures and equations (embedded with keys like `figure1`, `equation2`)
""")
# Connect the button to the processing function
process_btn.click(
fn=process_pdf,
inputs=[pdf_input, model_path_input],
outputs=[json_output, download_output],
api_name="process_document"
)
# Example section (optional - add example PDFs if available)
# gr.Examples(
# examples=[
# ["examples/sample1.pdf"],
# ["examples/sample2.pdf"],
# ],
# inputs=pdf_input,
# )
# Launch the app
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
) |