aiexplorations
/

vidai-demo

Model card Files Files and versions

xet

Community

aiexplorations commited on Jan 13

Commit

1841103

verified ·

1 Parent(s): f891147

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +503 -0

app.py ADDED Viewed

	@@ -0,0 +1,503 @@

+#!/usr/bin/env python3
+"""
+Vidai HuggingFace Spaces Demo
+Self-contained demo for parsing mathematical expressions to prefix notation.
+Includes all necessary model code to run standalone on HuggingFace Spaces.
+"""
+from dataclasses import dataclass
+from typing import Optional
+import gradio as gr
+import sympy
+import torch
+import torch.nn as nn
+from huggingface_hub import hf_hub_download
+# =============================================================================
+# Model Configuration
+# =============================================================================
+@dataclass
+class TreeComputeConfig:
+    """Configuration for the Tree Compute Transformer."""
+    d_model: int = 256
+    n_context_layers: int = 4
+    n_heads: int = 8
+    d_ff: int = 1024
+    dropout: float = 0.1
+    expert_hidden_dim: int = 128
+    expert_layers: int = 2
+    max_seq_len: int = 512
+    max_depth: int = 32
+    max_nodes: int = 64
+    vocab_size: int = 35
+    add_token_id: int = 16
+    sub_token_id: int = 15
+    mul_token_id: int = 18
+    div_token_id: int = 19
+    pow_token_id: int = 25
+    mod_token_id: int = 26
+    sqrt_token_id: int = 27
+    abs_token_id: int = 28
+    floor_token_id: int = 29
+    ceil_token_id: int = 30
+    value_clamp_min: float = -1e6
+    value_clamp_max: float = 1e6
+    n_decoder_layers: int = 4
+    parser_vocab_size: int = 128
+    max_output_len: int = 256
+    parser_pad_id: int = 0
+    parser_bos_id: int = 1
+    parser_eos_id: int = 2
+    def __post_init__(self) -> None:
+        assert self.d_model % self.n_heads == 0
+# =============================================================================
+# Model Components
+# =============================================================================
+class ContextEncoder(nn.Module):
+    """Transformer encoder for input text."""
+    def __init__(self, config: TreeComputeConfig):
+        super().__init__()
+        self.config = config
+        self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
+        self.position_embedding = nn.Embedding(config.max_seq_len, config.d_model)
+        self.depth_embedding = nn.Embedding(config.max_depth, config.d_model)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.d_model,
+            nhead=config.n_heads,
+            dim_feedforward=config.d_ff,
+            dropout=config.dropout,
+            activation='gelu',
+            batch_first=True,
+        )
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=config.n_context_layers)
+        self.layer_norm = nn.LayerNorm(config.d_model)
+        self.dropout = nn.Dropout(config.dropout)
+    def forward(self, input_ids, tree_depths, attention_mask=None):
+        batch_size, seq_len = input_ids.shape
+        device = input_ids.device
+        x = self.token_embedding(input_ids)
+        positions = torch.arange(seq_len, device=device).expand(batch_size, -1)
+        x = x + self.position_embedding(positions)
+        depths_clamped = tree_depths.clamp(0, self.config.max_depth - 1)
+        x = x + self.depth_embedding(depths_clamped)
+        x = self.dropout(x)
+        src_key_padding_mask = ~attention_mask if attention_mask is not None else None
+        x = self.transformer(x, src_key_padding_mask=src_key_padding_mask)
+        return self.layer_norm(x)
+class SymbolicParserDecoder(nn.Module):
+    """Transformer decoder for generating prefix notation."""
+    def __init__(self, config: TreeComputeConfig):
+        super().__init__()
+        self.config = config
+        self.token_embedding = nn.Embedding(config.parser_vocab_size, config.d_model)
+        self.position_embedding = nn.Embedding(config.max_output_len, config.d_model)
+        decoder_layer = nn.TransformerDecoderLayer(
+            d_model=config.d_model,
+            nhead=config.n_heads,
+            dim_feedforward=config.d_ff,
+            dropout=config.dropout,
+            activation='gelu',
+            batch_first=True,
+        )
+        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.n_decoder_layers)
+        self.output_projection = nn.Linear(config.d_model, config.parser_vocab_size)
+        self.layer_norm = nn.LayerNorm(config.d_model)
+        self.dropout = nn.Dropout(config.dropout)
+    def forward(self, target_ids, encoder_memory, target_mask=None, memory_mask=None):
+        batch_size, tgt_len = target_ids.shape
+        device = target_ids.device
+        x = self.token_embedding(target_ids)
+        positions = torch.arange(tgt_len, device=device).unsqueeze(0).expand(batch_size, -1)
+        x = x + self.position_embedding(positions)
+        x = self.dropout(x)
+        causal_mask = nn.Transformer.generate_square_subsequent_mask(tgt_len, device=device)
+        tgt_key_padding_mask = ~target_mask if target_mask is not None else None
+        memory_key_padding_mask = ~memory_mask if memory_mask is not None else None
+        x = self.transformer_decoder(
+            tgt=x, memory=encoder_memory, tgt_mask=causal_mask,
+            tgt_key_padding_mask=tgt_key_padding_mask,
+            memory_key_padding_mask=memory_key_padding_mask,
+        )
+        x = self.layer_norm(x)
+        return self.output_projection(x)
+    @torch.no_grad()
+    def generate(self, encoder_memory, memory_mask=None, max_len=None, temperature=1.0):
+        if max_len is None:
+            max_len = self.config.max_output_len
+        batch_size = encoder_memory.shape[0]
+        device = encoder_memory.device
+        output_ids = torch.full((batch_size, 1), self.config.parser_bos_id, dtype=torch.long, device=device)
+        memory_key_padding_mask = ~memory_mask if memory_mask is not None else None
+        for _ in range(max_len - 1):
+            tgt_len = output_ids.shape[1]
+            x = self.token_embedding(output_ids)
+            positions = torch.arange(tgt_len, device=device).unsqueeze(0).expand(batch_size, -1)
+            x = x + self.position_embedding(positions)
+            causal_mask = nn.Transformer.generate_square_subsequent_mask(tgt_len, device=device)
+            x = self.transformer_decoder(tgt=x, memory=encoder_memory, tgt_mask=causal_mask,
+                                         memory_key_padding_mask=memory_key_padding_mask)
+            x = self.layer_norm(x)
+            logits = self.output_projection(x[:, -1, :])
+            next_token = logits.argmax(dim=-1, keepdim=True)
+            output_ids = torch.cat([output_ids, next_token], dim=1)
+            if (next_token == self.config.parser_eos_id).all():
+                break
+        return output_ids
+class TreeComputeTransformer(nn.Module):
+    """Main Vidai model combining encoder, decoder, and compute modules."""
+    def __init__(self, config: TreeComputeConfig):
+        super().__init__()
+        self.config = config
+        self.context_encoder = ContextEncoder(config)
+        self.parser_decoder = SymbolicParserDecoder(config)
+    @torch.no_grad()
+    def parse(self, input_ids, input_mask=None, max_len=256, temperature=1.0, beam_size=1):
+        if input_mask is None:
+            input_mask = input_ids != 0
+        tree_depths = torch.zeros_like(input_ids)
+        encoder_output = self.context_encoder(input_ids, tree_depths, input_mask)
+        return self.parser_decoder.generate(encoder_memory=encoder_output, memory_mask=input_mask,
+                                            max_len=max_len, temperature=temperature)
+# =============================================================================
+# Tokenizer
+# =============================================================================
+class ParserTokenizer:
+    """Tokenizer for parsing mathematical expressions."""
+    PAD_TOKEN = "<pad>"
+    BOS_TOKEN = "<bos>"
+    EOS_TOKEN = "<eos>"
+    UNK_TOKEN = "<unk>"
+    def __init__(self):
+        self.input_vocab_size = 256
+        self.output_vocab = self._build_output_vocab()
+        self.output_token_to_id = {t: i for i, t in enumerate(self.output_vocab)}
+        self.output_id_to_token = {i: t for i, t in enumerate(self.output_vocab)}
+    def _build_output_vocab(self):
+        vocab = [self.PAD_TOKEN, self.BOS_TOKEN, self.EOS_TOKEN, self.UNK_TOKEN]
+        vocab.extend(["+", "-", "*", "/", "**", "%"])
+        vocab.extend(["sqrt", "abs", "floor", "ceil", "sin", "cos", "tan", "log", "exp"])
+        vocab.extend(list("xyzabcdnmtrvgk"))
+        vocab.extend(["alpha", "beta", "gamma", "delta", "theta", "phi", "psi", "omega", "lambda", "mu", "sigma", "tau"])
+        vocab.extend(["pi", "e", "i"])
+        vocab.extend(list("0123456789."))
+        vocab.extend(["(", ")", " ", ",", "/"])
+        return vocab
+    @property
+    def output_vocab_size(self):
+        return len(self.output_vocab)
+    @property
+    def pad_id(self):
+        return self.output_token_to_id[self.PAD_TOKEN]
+    @property
+    def bos_id(self):
+        return self.output_token_to_id[self.BOS_TOKEN]
+    @property
+    def eos_id(self):
+        return self.output_token_to_id[self.EOS_TOKEN]
+    def encode_input(self, text: str, max_len: int = 256) -> list:
+        ids = [ord(c) if ord(c) < 256 else ord('?') for c in text]
+        ids = ids[:max_len]
+        ids = ids + [0] * (max_len - len(ids))
+        return ids
+    def decode_output(self, ids: list, skip_special: bool = True) -> str:
+        tokens = []
+        special_ids = {self.pad_id, self.bos_id, self.eos_id}
+        for tid in ids:
+            if tid == self.eos_id:
+                break
+            if skip_special and tid in special_ids:
+                continue
+            if tid < len(self.output_id_to_token):
+                tokens.append(self.output_id_to_token[tid])
+        return "".join(tokens)
+# =============================================================================
+# Prefix Notation to SymPy
+# =============================================================================
+PREFIX_OPS = {
+    '+': lambda a, b: a + b,
+    '-': lambda a, b: a - b,
+    '*': lambda a, b: a * b,
+    '/': lambda a, b: a / b,
+    '**': lambda a, b: a ** b,
+    '^': lambda a, b: a ** b,
+}
+PREFIX_UNARY = {
+    'sqrt': sympy.sqrt,
+    'abs': sympy.Abs,
+    'floor': sympy.floor,
+    'ceil': sympy.ceiling,
+    'sin': sympy.sin,
+    'cos': sympy.cos,
+    'tan': sympy.tan,
+    'exp': sympy.exp,
+    'log': sympy.log,
+}
+PREFIX_CONSTANTS = {
+    'pi': sympy.pi,
+    'e': sympy.E,
+}
+def prefix_to_sympy(prefix_str: str):
+    """Convert prefix notation to SymPy expression."""
+    tokens = prefix_str.strip().split()
+    if not tokens:
+        raise ValueError("Empty prefix notation")
+    result, remaining = _parse_prefix_tokens(tokens)
+    if remaining:
+        raise ValueError(f"Unexpected tokens: {remaining}")
+    return result
+def _parse_prefix_tokens(tokens):
+    if not tokens:
+        raise ValueError("Unexpected end of tokens")
+    token = tokens[0]
+    rest = tokens[1:]
+    if token in PREFIX_OPS:
+        left, rest = _parse_prefix_tokens(rest)
+        right, rest = _parse_prefix_tokens(rest)
+        return PREFIX_OPS[token](left, right), rest
+    if token in PREFIX_UNARY:
+        operand, rest = _parse_prefix_tokens(rest)
+        return PREFIX_UNARY[token](operand), rest
+    if token in PREFIX_CONSTANTS:
+        return PREFIX_CONSTANTS[token], rest
+    try:
+        if '.' not in token:
+            return sympy.Integer(token), rest
+        return sympy.Float(token), rest
+    except (ValueError, TypeError):
+        pass
+    return sympy.Symbol(token), rest
+# =============================================================================
+# Global State
+# =============================================================================
+MODEL = None
+TOKENIZER = None
+DEVICE = None
+def load_model():
+    """Load model from HuggingFace Hub."""
+    global MODEL, TOKENIZER, DEVICE
+    if MODEL is not None:
+        return MODEL, TOKENIZER
+    if torch.cuda.is_available():
+        DEVICE = "cuda"
+    else:
+        DEVICE = "cpu"
+    checkpoint_path = hf_hub_download(
+        repo_id="aiexplorations/vidai",
+        filename="finetune_v1_step3500.pt",
+    )
+    ckpt = torch.load(checkpoint_path, map_location=DEVICE, weights_only=False)
+    config = TreeComputeConfig(**ckpt['config']['model_config'])
+    MODEL = TreeComputeTransformer(config)
+    MODEL.load_state_dict(ckpt['model_state_dict'])
+    MODEL.eval()
+    MODEL.to(DEVICE)
+    TOKENIZER = ParserTokenizer()
+    return MODEL, TOKENIZER
+def parse_expression(expression: str, evaluate: bool = False, substitutions: str = ""):
+    """Parse a mathematical expression to prefix notation."""
+    if not expression.strip():
+        return "", "", "Please enter an expression"
+    try:
+        model, tokenizer = load_model()
+    except Exception as e:
+        return "", "", f"Model loading error: {str(e)}"
+    try:
+        encoded = tokenizer.encode_input(expression, max_len=128)
+        input_ids = torch.tensor([encoded], device=DEVICE)
+        input_mask = (input_ids != 0).bool()
+        with torch.no_grad():
+            output_ids = model.parse(input_ids, input_mask, max_len=64)
+        prefix = tokenizer.decode_output(output_ids[0].tolist())
+        eval_result = ""
+        if evaluate and prefix:
+            try:
+                sympy_expr = prefix_to_sympy(prefix)
+                subs = {}
+                if substitutions.strip():
+                    # Handle various formats: "x=1, y=2" or "x=1 y=2" or "x = 1, y = 2"
+                    import re
+                    pairs = re.findall(r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([+-]?[\d.]+)', substitutions)
+                    for var, val in pairs:
+                        subs[sympy.Symbol(var)] = float(val)
+                if subs:
+                    result = sympy_expr.subs(subs)
+                    eval_result = str(float(result))
+                elif not sympy_expr.free_symbols:
+                    eval_result = str(float(sympy_expr))
+                else:
+                    eval_result = f"Symbolic: {sympy_expr}"
+            except Exception as e:
+                eval_result = f"Evaluation error: {str(e)}"
+        return prefix, eval_result, "Success"
+    except Exception as e:
+        return "", "", f"Error: {str(e)}"
+# =============================================================================
+# Gradio Interface
+# =============================================================================
+EXAMPLES = [
+    # These work reliably
+    ["3 + 5 * 2", True, ""],
+    ["(x^2) + (3*y)", False, ""],
+    ["(x^2) + y", True, "x=3, y=4"],
+    ["sin(pi/2)", True, ""],
+    ["sqrt(16)", True, ""],
+    ["(a + b) * (a - b)", True, "a=5, b=3"],
+    ["(2*x) + (3*y) - z", True, "x=1, y=2, z=3"],
+]
+with gr.Blocks(title="Vidai - Neural Math Parser", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # Vidai: Neural Mathematical Parsing
+    > **Work in Progress**: Simple expressions work well; complex expressions need more training data.
+    > [Read the full story](https://rajeshrs.in/blog/ai-explorations/posts/2026-01-04-vidai-teaching-machines-arithmetic/)
+    Vidai (Tamil for "answer") uses transformers for what they're good at: recognizing the tree structure
+    in mathematical expressions. Instead of learning arithmetic from text, it learns to parse notation
+    into trees, then SymPy computes exact results.
+    - **Input**: Mathematical expression (e.g., `(x^2) + (3*y)`)
+    - **Output**: Prefix notation tree (e.g., `+ ** x 2 * 3 y` where `+` is the root)
+    - **Tip**: Use parentheses for reliable results
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            expression_input = gr.Textbox(
+                label="Mathematical Expression",
+                placeholder="Enter an expression like: x^2 + 3*y",
+                lines=1,
+            )
+            with gr.Row():
+                evaluate_checkbox = gr.Checkbox(label="Evaluate", value=False)
+                substitutions_input = gr.Textbox(
+                    label="Variable Substitutions (optional)",
+                    placeholder="x=3, y=4",
+                    lines=1,
+                )
+            parse_button = gr.Button("Parse", variant="primary")
+        with gr.Column(scale=2):
+            prefix_output = gr.Textbox(label="Prefix Notation", interactive=False)
+            eval_output = gr.Textbox(label="Evaluation Result", interactive=False)
+            status_output = gr.Textbox(label="Status", interactive=False)
+    gr.Markdown("### Examples")
+    gr.Examples(
+        examples=EXAMPLES,
+        inputs=[expression_input, evaluate_checkbox, substitutions_input],
+        outputs=[prefix_output, eval_output, status_output],
+        fn=parse_expression,
+        cache_examples=False,
+    )
+    gr.Markdown("""
+    ---
+    ### How It Works
+    1. **Character-level encoding**: Input is encoded as ASCII characters
+    2. **Transformer parsing**: Encoder-decoder model (44.6M params) converts to prefix notation
+    3. **SymPy evaluation**: Deterministic symbolic computation (0 learned parameters)
+    **Supported operations**: +, -, *, /, ^ (power), sqrt, sin, cos, tan, log, exp, abs
+    **Variables**: x, y, z, a, b, c, d, n, m, t, r, pi, e, and Greek letters
+    ---
+    ### Known Limitations
+    | Expression Type | Accuracy | Recommendation |
+    |-----------------|----------|----------------|
+    | Parenthesized expressions | **100%** | Always works |
+    | Simple expressions (2 terms) | ~95% | Usually works |
+    | Complex without parens (3+ terms) | ~86% | Add parentheses |
+    | Functions + operators | ~86% | Wrap functions: `(sqrt(x)) + y` |
+    **For reliable results**: `(sqrt(16)) + (2^3)` instead of `sqrt(16) + 2^3`
+    [GitHub](https://github.com/aiexplorations/vidai) | [Model Card](https://huggingface.co/aiexplorations/vidai)
+    """)
+    parse_button.click(
+        fn=parse_expression,
+        inputs=[expression_input, evaluate_checkbox, substitutions_input],
+        outputs=[prefix_output, eval_output, status_output],
+    )
+if __name__ == "__main__":
+    demo.launch()