File size: 2,220 Bytes
a321b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/bash
# Deploy memory optimizations via SSH to HuggingFace Space
# Run this after adding SSH key to HuggingFace settings

set -e

echo "[1/5] Testing SSH connection..."
ssh -o ConnectTimeout=10 [email protected] "echo 'SSH OK' && pwd"

echo ""
echo "[2/5] Backing up current file..."
ssh [email protected] "cp /home/user/app/src/forecasting/chronos_inference.py /home/user/app/src/forecasting/chronos_inference.py.backup"

echo ""
echo "[3/5] Applying memory optimizations..."

# Add model.eval() after line 72
ssh [email protected] "sed -i '72a\\        # Set model to evaluation mode (disables dropout, etc.)' /home/user/app/src/forecasting/chronos_inference.py"
ssh [email protected] "sed -i '73a\\        self._pipeline.model.eval()' /home/user/app/src/forecasting/chronos_inference.py"

# Add torch.inference_mode() wrapper around predict_df()
ssh [email protected] "sed -i '188i\\                # Use torch.inference_mode() to disable gradient tracking (saves ~2-5 GB VRAM)' /home/user/app/src/forecasting/chronos_inference.py"
ssh [email protected] "sed -i '189i\\                with torch.inference_mode():' /home/user/app/src/forecasting/chronos_inference.py"

# Indent predict_df() call (add 4 spaces)
ssh [email protected] "sed -i '190,197s/^/    /' /home/user/app/src/forecasting/chronos_inference.py"

echo ""
echo "[4/5] Verifying changes..."
ssh [email protected] "grep -A 2 'model.eval()' /home/user/app/src/forecasting/chronos_inference.py || echo 'ERROR: model.eval() not found'"
ssh [email protected] "grep -A 2 'inference_mode()' /home/user/app/src/forecasting/chronos_inference.py || echo 'ERROR: inference_mode() not found'"

echo ""
echo "[5/5] Restarting Gradio app..."
ssh [email protected] "pkill -f 'app.py' || true"
sleep 3
ssh [email protected] "cd /home/user/app && nohup python app.py > /tmp/gradio.log 2>&1 &"

echo ""
echo "[SUCCESS] Memory optimizations deployed!"
echo "[INFO] App restarting - test in 30 seconds"
echo ""
echo "Test with:"
echo "  python test_api.py"