|
|
""" |
|
|
System Metrics API - Real-time system monitoring with actual metrics |
|
|
Provides CPU, memory, uptime, request rate, response time, and error rate |
|
|
All metrics are REAL and measured, no fake data. |
|
|
""" |
|
|
import logging |
|
|
import time |
|
|
import psutil |
|
|
from datetime import datetime |
|
|
from typing import Dict, Any, Optional |
|
|
from fastapi import APIRouter, HTTPException |
|
|
from pydantic import BaseModel |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
router = APIRouter() |
|
|
|
|
|
|
|
|
class MetricsTracker: |
|
|
"""Track request metrics for real-time monitoring""" |
|
|
|
|
|
def __init__(self): |
|
|
self.start_time = time.time() |
|
|
self.request_count = 0 |
|
|
self.error_count = 0 |
|
|
self.response_times = [] |
|
|
self.max_response_times = 100 |
|
|
self.last_minute_requests = [] |
|
|
self.last_minute_errors = [] |
|
|
|
|
|
def record_request(self, response_time_ms: float, is_error: bool = False): |
|
|
"""Record a request with its response time""" |
|
|
current_time = time.time() |
|
|
|
|
|
self.request_count += 1 |
|
|
if is_error: |
|
|
self.error_count += 1 |
|
|
|
|
|
|
|
|
self.response_times.append(response_time_ms) |
|
|
if len(self.response_times) > self.max_response_times: |
|
|
self.response_times.pop(0) |
|
|
|
|
|
|
|
|
self.last_minute_requests.append(current_time) |
|
|
self.last_minute_requests = [t for t in self.last_minute_requests if current_time - t < 60] |
|
|
|
|
|
|
|
|
if is_error: |
|
|
self.last_minute_errors.append(current_time) |
|
|
self.last_minute_errors = [t for t in self.last_minute_errors if current_time - t < 60] |
|
|
|
|
|
def get_requests_per_minute(self) -> int: |
|
|
"""Get number of requests in the last minute""" |
|
|
return len(self.last_minute_requests) |
|
|
|
|
|
def get_average_response_time(self) -> float: |
|
|
"""Get average response time in milliseconds""" |
|
|
if not self.response_times: |
|
|
return 0.0 |
|
|
return sum(self.response_times) / len(self.response_times) |
|
|
|
|
|
def get_error_rate(self) -> float: |
|
|
"""Get error rate as a percentage""" |
|
|
if self.request_count == 0: |
|
|
return 0.0 |
|
|
return (self.error_count / self.request_count) * 100 |
|
|
|
|
|
def get_uptime(self) -> int: |
|
|
"""Get uptime in seconds""" |
|
|
return int(time.time() - self.start_time) |
|
|
|
|
|
|
|
|
|
|
|
_metrics_tracker: Optional[MetricsTracker] = None |
|
|
|
|
|
|
|
|
def get_metrics_tracker() -> MetricsTracker: |
|
|
"""Get or create the global metrics tracker""" |
|
|
global _metrics_tracker |
|
|
if _metrics_tracker is None: |
|
|
_metrics_tracker = MetricsTracker() |
|
|
return _metrics_tracker |
|
|
|
|
|
|
|
|
|
|
|
class SystemMetricsResponse(BaseModel): |
|
|
"""System metrics response model""" |
|
|
cpu: float |
|
|
memory: Dict[str, float] |
|
|
uptime: int |
|
|
requests_per_min: int |
|
|
avg_response_ms: float |
|
|
error_rate: float |
|
|
timestamp: int |
|
|
status: str = "ok" |
|
|
|
|
|
|
|
|
@router.get("/api/system/metrics", response_model=SystemMetricsResponse) |
|
|
async def get_system_metrics(): |
|
|
""" |
|
|
Get real-time system metrics |
|
|
|
|
|
Returns: |
|
|
- cpu: CPU usage percentage (0-100) |
|
|
- memory: Memory usage (used and total in MB) |
|
|
- uptime: Process uptime in seconds |
|
|
- requests_per_min: Number of requests in the last minute |
|
|
- avg_response_ms: Average response time in milliseconds |
|
|
- error_rate: Error rate as percentage |
|
|
- timestamp: Current Unix timestamp |
|
|
|
|
|
All metrics are REAL and measured, no fake data. |
|
|
""" |
|
|
try: |
|
|
tracker = get_metrics_tracker() |
|
|
|
|
|
|
|
|
cpu_percent = psutil.cpu_percent(interval=0.1) |
|
|
|
|
|
|
|
|
memory = psutil.virtual_memory() |
|
|
memory_used_mb = memory.used / (1024 * 1024) |
|
|
memory_total_mb = memory.total / (1024 * 1024) |
|
|
|
|
|
|
|
|
uptime = tracker.get_uptime() |
|
|
|
|
|
|
|
|
requests_per_min = tracker.get_requests_per_minute() |
|
|
avg_response_ms = tracker.get_average_response_time() |
|
|
error_rate = tracker.get_error_rate() |
|
|
|
|
|
|
|
|
timestamp = int(time.time()) |
|
|
|
|
|
return SystemMetricsResponse( |
|
|
cpu=round(cpu_percent, 2), |
|
|
memory={ |
|
|
"used": round(memory_used_mb, 2), |
|
|
"total": round(memory_total_mb, 2), |
|
|
"percent": round(memory.percent, 2) |
|
|
}, |
|
|
uptime=uptime, |
|
|
requests_per_min=requests_per_min, |
|
|
avg_response_ms=round(avg_response_ms, 2), |
|
|
error_rate=round(error_rate, 2), |
|
|
timestamp=timestamp, |
|
|
status="ok" |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to get system metrics: {e}") |
|
|
|
|
|
return SystemMetricsResponse( |
|
|
cpu=0.0, |
|
|
memory={"used": 0.0, "total": 0.0, "percent": 0.0}, |
|
|
uptime=0, |
|
|
requests_per_min=0, |
|
|
avg_response_ms=0.0, |
|
|
error_rate=0.0, |
|
|
timestamp=int(time.time()), |
|
|
status="degraded" |
|
|
) |
|
|
|
|
|
|
|
|
@router.get("/api/system/health") |
|
|
async def get_system_health(): |
|
|
""" |
|
|
Get system health status |
|
|
|
|
|
Returns basic health information for monitoring |
|
|
""" |
|
|
try: |
|
|
tracker = get_metrics_tracker() |
|
|
cpu_percent = psutil.cpu_percent(interval=0.1) |
|
|
memory = psutil.virtual_memory() |
|
|
|
|
|
|
|
|
status = "healthy" |
|
|
issues = [] |
|
|
|
|
|
if cpu_percent > 90: |
|
|
status = "warning" |
|
|
issues.append("High CPU usage") |
|
|
|
|
|
if memory.percent > 90: |
|
|
status = "warning" |
|
|
issues.append("High memory usage") |
|
|
|
|
|
if tracker.get_error_rate() > 10: |
|
|
status = "warning" |
|
|
issues.append("High error rate") |
|
|
|
|
|
return { |
|
|
"status": status, |
|
|
"cpu_percent": round(cpu_percent, 2), |
|
|
"memory_percent": round(memory.percent, 2), |
|
|
"uptime": tracker.get_uptime(), |
|
|
"issues": issues, |
|
|
"timestamp": int(time.time()) |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to get system health: {e}") |
|
|
return { |
|
|
"status": "error", |
|
|
"error": str(e), |
|
|
"timestamp": int(time.time()) |
|
|
} |
|
|
|
|
|
|
|
|
@router.get("/api/system/info") |
|
|
async def get_system_info(): |
|
|
""" |
|
|
Get static system information |
|
|
|
|
|
Returns system configuration and details |
|
|
""" |
|
|
try: |
|
|
import platform |
|
|
|
|
|
return { |
|
|
"platform": platform.system(), |
|
|
"platform_release": platform.release(), |
|
|
"platform_version": platform.version(), |
|
|
"architecture": platform.machine(), |
|
|
"processor": platform.processor(), |
|
|
"python_version": platform.python_version(), |
|
|
"cpu_count": psutil.cpu_count(), |
|
|
"cpu_count_logical": psutil.cpu_count(logical=True), |
|
|
"memory_total_gb": round(psutil.virtual_memory().total / (1024**3), 2), |
|
|
"timestamp": int(time.time()) |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Failed to get system info: {e}") |
|
|
return { |
|
|
"error": str(e), |
|
|
"timestamp": int(time.time()) |
|
|
} |
|
|
|