Initial commit with CI workflow

- Flask backend with SSH discovery and Claude AI integration - React/Vite frontend with Tailwind CSS - Docker multi-stage build - Gitea Actions workflow for container builds 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 21:38:50 +00:00
commit ea49143a13
31 changed files with 3037 additions and 0 deletions
--- a/backend/services/init.py
+++ b/backend/services/init.py
@@ -0,0 +1 @@
+# Services package
--- a/backend/services/claude_agent.py
+++ b/backend/services/claude_agent.py
@@ -0,0 +1,273 @@
+import json
+from typing import Optional
+from dataclasses import dataclass
+
+from anthropic import Anthropic
+
+from config import get_config
+
+
+SYSTEM_PROMPT = """You are an intelligent monitoring configuration assistant for Uptime Kuma. Your role is to analyze system information from hosts and recommend what should be monitored.
+
+## Your Capabilities
+1. Analyze host scan results (OS info, running services, Docker containers, open ports)
+2. Suggest monitors to create in Uptime Kuma
+3. Request additional SSH commands to gather more information when needed
+4. Explain your monitoring recommendations
+
+## Rules for Suggestions
+1. **Always explain WHY** you want to monitor something - what failure would it detect?
+2. **Be specific** with monitor configurations (ports, paths, intervals)
+3. **Prioritize critical services** - databases, web servers, auth services come first
+4. **Suggest appropriate intervals** based on criticality:
+   - Critical services (databases, auth): 30-60 seconds
+   - Web services: 60-120 seconds
+   - Background jobs: 300 seconds
+5. **Look for health endpoints** - prefer /health, /healthz, /status over root paths
+6. **Consider dependencies** - if a service depends on another, both should be monitored
+
+## Rules for SSH Commands
+When you need more information, you can request SSH commands. Follow these rules:
+1. **Read-only only** - never suggest commands that modify the system
+2. **Be specific** - explain exactly what information you need and why
+3. **Safe commands only** - no sudo unless absolutely necessary for reading
+4. **Examples of acceptable commands:**
+   - `curl -s localhost:8080/health` - check if a service responds
+   - `cat /etc/nginx/nginx.conf` - read configuration
+   - `docker inspect <container>` - get container details
+   - `systemctl status <service>` - check service status
+
+## Response Format
+Always respond with valid JSON in this structure:
+{
+    "analysis": "Your analysis of what you found on the host",
+    "monitors": [
+        {
+            "type": "http|tcp|ping|docker|keyword",
+            "name": "Human-readable monitor name",
+            "target": "URL, hostname, or container name",
+            "port": 80,
+            "interval": 60,
+            "reason": "Why this should be monitored"
+        }
+    ],
+    "additional_commands": [
+        {
+            "command": "the SSH command to run",
+            "reason": "why you need this information"
+        }
+    ],
+    "questions": ["Any questions for the user about what to monitor"]
+}
+
+## Monitor Types
+- **http**: Web endpoints (provide full URL with protocol)
+- **tcp**: Port connectivity (provide hostname and port)
+- **ping**: Host availability (provide hostname)
+- **docker**: Docker container status (provide container name)
+- **keyword**: Check for specific text in response (provide URL and keyword)
+
+Be thorough but not excessive. Quality over quantity - suggest monitors that will actually catch real problems."""
+
+
+@dataclass
+class MonitorSuggestion:
+    """A suggested monitor configuration."""
+
+    type: str
+    name: str
+    target: str
+    port: Optional[int] = None
+    interval: int = 60
+    reason: str = ""
+    keyword: Optional[str] = None
+
+
+@dataclass
+class CommandRequest:
+    """A request to run an SSH command."""
+
+    command: str
+    reason: str
+
+
+@dataclass
+class AgentResponse:
+    """Response from the Claude agent."""
+
+    analysis: str
+    monitors: list[MonitorSuggestion]
+    additional_commands: list[CommandRequest]
+    questions: list[str]
+    raw_response: str
+
+
+class ClaudeAgent:
+    """Claude AI agent for intelligent monitoring suggestions."""
+
+    def __init__(self):
+        config = get_config()
+        self.client = Anthropic(api_key=config.claude_api_key)
+        self.conversation_history: list[dict] = []
+
+    def analyze_host(self, scan_results: dict, hostname: str) -> AgentResponse:
+        """Analyze host scan results and suggest monitors."""
+        user_message = f"""I've scanned the host '{hostname}' and gathered the following information:
+
+## System Information
+```
+{scan_results.get('system_info', 'Not available')}
+```
+
+## OS Release
+```
+{scan_results.get('os_release', 'Not available')}
+```
+
+## Running Docker Containers
+```
+{scan_results.get('docker_containers', 'No Docker or no containers running')}
+```
+
+## Running Systemd Services
+```
+{scan_results.get('systemd_services', 'Not available')}
+```
+
+## Disk Usage
+```
+{scan_results.get('disk_usage', 'Not available')}
+```
+
+## Memory Usage
+```
+{scan_results.get('memory_usage', 'Not available')}
+```
+
+## CPU Info
+```
+{scan_results.get('cpu_count', 'Not available')} CPU cores
+```
+
+## Open Ports (Listening)
+```
+{scan_results.get('open_ports', 'Not available')}
+```
+
+Please analyze this information and suggest what should be monitored in Uptime Kuma.
+Respond with JSON as specified in your instructions."""
+
+        return self._send_message(user_message)
+
+    def process_command_results(self, command: str, result: str) -> AgentResponse:
+        """Process the results of an additional SSH command."""
+        user_message = f"""Here are the results of the command you requested:
+
+Command: `{command}`
+
+Output:
+```
+{result}
+```
+
+Please update your analysis and suggestions based on this new information.
+Respond with JSON as specified in your instructions."""
+
+        return self._send_message(user_message)
+
+    def answer_question(self, question: str, answer: str) -> AgentResponse:
+        """Process user's answer to a question."""
+        user_message = f"""You asked: "{question}"
+
+The user responded: "{answer}"
+
+Please update your recommendations based on this information.
+Respond with JSON as specified in your instructions."""
+
+        return self._send_message(user_message)
+
+    def _send_message(self, user_message: str) -> AgentResponse:
+        """Send a message to Claude and parse the response."""
+        self.conversation_history.append({"role": "user", "content": user_message})
+
+        response = self.client.messages.create(
+            model="claude-sonnet-4-20250514",
+            max_tokens=4096,
+            system=SYSTEM_PROMPT,
+            messages=self.conversation_history,
+        )
+
+        assistant_message = response.content[0].text
+        self.conversation_history.append({"role": "assistant", "content": assistant_message})
+
+        return self._parse_response(assistant_message)
+
+    def _parse_response(self, response_text: str) -> AgentResponse:
+        """Parse Claude's JSON response."""
+        # Try to extract JSON from the response
+        try:
+            # Look for JSON block in the response
+            json_start = response_text.find("{")
+            json_end = response_text.rfind("}") + 1
+
+            if json_start != -1 and json_end > json_start:
+                json_str = response_text[json_start:json_end]
+                data = json.loads(json_str)
+            else:
+                # No JSON found, return empty response
+                return AgentResponse(
+                    analysis=response_text,
+                    monitors=[],
+                    additional_commands=[],
+                    questions=[],
+                    raw_response=response_text,
+                )
+
+            monitors = []
+            for m in data.get("monitors", []):
+                monitors.append(
+                    MonitorSuggestion(
+                        type=m.get("type", "http"),
+                        name=m.get("name", "Unknown"),
+                        target=m.get("target", ""),
+                        port=m.get("port"),
+                        interval=m.get("interval", 60),
+                        reason=m.get("reason", ""),
+                        keyword=m.get("keyword"),
+                    )
+                )
+
+            commands = []
+            for c in data.get("additional_commands", []):
+                commands.append(
+                    CommandRequest(
+                        command=c.get("command", ""),
+                        reason=c.get("reason", ""),
+                    )
+                )
+
+            return AgentResponse(
+                analysis=data.get("analysis", ""),
+                monitors=monitors,
+                additional_commands=commands,
+                questions=data.get("questions", []),
+                raw_response=response_text,
+            )
+
+        except json.JSONDecodeError:
+            return AgentResponse(
+                analysis=response_text,
+                monitors=[],
+                additional_commands=[],
+                questions=[],
+                raw_response=response_text,
+            )
+
+    def reset_conversation(self) -> None:
+        """Reset the conversation history."""
+        self.conversation_history = []
+
+
+def create_agent() -> ClaudeAgent:
+    """Create a new Claude agent instance."""
+    return ClaudeAgent()
--- a/backend/services/discovery.py
+++ b/backend/services/discovery.py
@@ -0,0 +1,205 @@
+from dataclasses import dataclass
+from typing import Optional, Callable
+import uuid
+
+from services.ssh_manager import get_ssh_manager, CommandResult
+
+
+# Built-in safe commands that never require approval
+SAFE_DISCOVERY_COMMANDS = {
+    "system_info": "uname -a",
+    "os_release": "cat /etc/os-release 2>/dev/null || cat /etc/*-release 2>/dev/null | head -20",
+    "docker_containers": "docker ps --format '{{.ID}}\\t{{.Names}}\\t{{.Image}}\\t{{.Status}}\\t{{.Ports}}' 2>/dev/null || echo 'Docker not available'",
+    "systemd_services": "systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -50 || echo 'Systemd not available'",
+    "disk_usage": "df -h 2>/dev/null | head -20",
+    "memory_usage": "free -h 2>/dev/null || cat /proc/meminfo 2>/dev/null | head -10",
+    "cpu_count": "nproc 2>/dev/null || grep -c processor /proc/cpuinfo 2>/dev/null || echo 'Unknown'",
+    "open_ports": "ss -tlnp 2>/dev/null || netstat -tlnp 2>/dev/null | head -30 || echo 'Unable to list ports'",
+}
+
+
+@dataclass
+class DiscoveryResult:
+    """Results from a host discovery scan."""
+
+    scan_id: str
+    hostname: str
+    username: str
+    port: int
+    connected: bool
+    error: Optional[str] = None
+    system_info: str = ""
+    os_release: str = ""
+    docker_containers: str = ""
+    systemd_services: str = ""
+    disk_usage: str = ""
+    memory_usage: str = ""
+    cpu_count: str = ""
+    open_ports: str = ""
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "scan_id": self.scan_id,
+            "hostname": self.hostname,
+            "username": self.username,
+            "port": self.port,
+            "connected": self.connected,
+            "error": self.error,
+            "system_info": self.system_info,
+            "os_release": self.os_release,
+            "docker_containers": self.docker_containers,
+            "systemd_services": self.systemd_services,
+            "disk_usage": self.disk_usage,
+            "memory_usage": self.memory_usage,
+            "cpu_count": self.cpu_count,
+            "open_ports": self.open_ports,
+        }
+
+
+class DiscoveryService:
+    """Service for discovering services on remote hosts."""
+
+    def __init__(self):
+        self.active_scans: dict[str, DiscoveryResult] = {}
+
+    def is_safe_command(self, command: str) -> bool:
+        """Check if a command is in the safe built-in list."""
+        # Check exact matches
+        if command in SAFE_DISCOVERY_COMMANDS.values():
+            return True
+
+        # Check if it's a safe read-only command pattern
+        safe_patterns = [
+            "cat ",
+            "head ",
+            "tail ",
+            "grep ",
+            "ls ",
+            "ps ",
+            "df ",
+            "free ",
+            "uname ",
+            "uptime",
+            "hostname",
+            "whoami",
+            "id ",
+            "docker ps",
+            "docker inspect",
+            "docker logs",
+            "systemctl status",
+            "systemctl list-",
+            "journalctl",
+            "ss ",
+            "netstat ",
+            "curl -s",
+            "wget -q",
+            "nproc",
+        ]
+
+        command_lower = command.lower().strip()
+        for pattern in safe_patterns:
+            if command_lower.startswith(pattern):
+                return True
+
+        return False
+
+    def scan_host(
+        self,
+        hostname: str,
+        username: str = "root",
+        port: int = 22,
+        on_progress: Optional[Callable[[str, str], None]] = None,
+    ) -> DiscoveryResult:
+        """
+        Scan a host for services and system information.
+
+        Args:
+            hostname: Target hostname or IP
+            username: SSH username
+            port: SSH port
+            on_progress: Callback for progress updates (command_name, status)
+
+        Returns:
+            DiscoveryResult with all gathered information
+        """
+        scan_id = str(uuid.uuid4())
+        result = DiscoveryResult(
+            scan_id=scan_id,
+            hostname=hostname,
+            username=username,
+            port=port,
+            connected=False,
+        )
+        self.active_scans[scan_id] = result
+
+        ssh = get_ssh_manager()
+
+        # Connect to host
+        if on_progress:
+            on_progress("connect", "connecting")
+
+        try:
+            ssh.connect(hostname, username, port)
+            result.connected = True
+            if on_progress:
+                on_progress("connect", "connected")
+        except Exception as e:
+            result.error = str(e)
+            if on_progress:
+                on_progress("connect", f"failed: {str(e)}")
+            return result
+
+        # Run discovery commands
+        for cmd_name, command in SAFE_DISCOVERY_COMMANDS.items():
+            if on_progress:
+                on_progress(cmd_name, "running")
+
+            try:
+                cmd_result = ssh.execute(hostname, command, username, port, timeout=30)
+                output = cmd_result.stdout if cmd_result.success else cmd_result.stderr
+                setattr(result, cmd_name, output.strip())
+
+                if on_progress:
+                    on_progress(cmd_name, "complete" if cmd_result.success else "failed")
+            except Exception as e:
+                setattr(result, cmd_name, f"Error: {str(e)}")
+                if on_progress:
+                    on_progress(cmd_name, f"error: {str(e)}")
+
+        return result
+
+    def run_additional_command(
+        self,
+        hostname: str,
+        command: str,
+        username: str = "root",
+        port: int = 22,
+    ) -> CommandResult:
+        """
+        Run an additional command on a host.
+
+        This should only be called after approval if in dev mode.
+        """
+        ssh = get_ssh_manager()
+
+        if not ssh.is_connected(hostname, username, port):
+            ssh.connect(hostname, username, port)
+
+        return ssh.execute(hostname, command, username, port, timeout=60)
+
+    def get_scan(self, scan_id: str) -> Optional[DiscoveryResult]:
+        """Get a scan result by ID."""
+        return self.active_scans.get(scan_id)
+
+
+# Global discovery service instance
+_discovery_service: Optional[DiscoveryService] = None
+
+
+def get_discovery_service() -> DiscoveryService:
+    """Get the global discovery service instance."""
+    global _discovery_service
+    if _discovery_service is None:
+        _discovery_service = DiscoveryService()
+    return _discovery_service
--- a/backend/services/kuma_client.py
+++ b/backend/services/kuma_client.py
@@ -0,0 +1,174 @@
+from typing import Optional
+from dataclasses import dataclass, asdict
+import requests
+
+from config import get_config
+
+
+@dataclass
+class Monitor:
+    """Uptime Kuma monitor configuration."""
+
+    type: str  # http, tcp, ping, docker, keyword
+    name: str
+    url: Optional[str] = None  # For HTTP monitors
+    hostname: Optional[str] = None  # For TCP/Ping monitors
+    port: Optional[int] = None  # For TCP monitors
+    interval: int = 60
+    keyword: Optional[str] = None  # For keyword monitors
+    docker_container: Optional[str] = None  # For Docker monitors
+    docker_host: Optional[str] = None  # For Docker monitors
+    retries: int = 3
+    retry_interval: int = 60
+    max_redirects: int = 10
+    accepted_statuscodes: list[str] = None
+    notification_id_list: Optional[list[int]] = None
+
+    def __post_init__(self):
+        if self.accepted_statuscodes is None:
+            self.accepted_statuscodes = ["200-299"]
+
+    def to_api_format(self) -> dict:
+        """Convert to Uptime Kuma API format."""
+        # Map our types to Kuma's type values
+        type_map = {
+            "http": "http",
+            "tcp": "port",
+            "ping": "ping",
+            "docker": "docker",
+            "keyword": "keyword",
+        }
+
+        data = {
+            "type": type_map.get(self.type, self.type),
+            "name": self.name,
+            "interval": self.interval,
+            "retries": self.retries,
+            "retryInterval": self.retry_interval,
+            "maxredirects": self.max_redirects,
+            "accepted_statuscodes": self.accepted_statuscodes,
+        }
+
+        if self.url:
+            data["url"] = self.url
+        if self.hostname:
+            data["hostname"] = self.hostname
+        if self.port:
+            data["port"] = self.port
+        if self.keyword:
+            data["keyword"] = self.keyword
+        if self.docker_container:
+            data["docker_container"] = self.docker_container
+        if self.docker_host:
+            data["docker_host"] = self.docker_host
+        if self.notification_id_list:
+            data["notificationIDList"] = self.notification_id_list
+
+        return data
+
+
+class UptimeKumaClient:
+    """Client for Uptime Kuma REST API."""
+
+    def __init__(self):
+        config = get_config()
+        self.base_url = config.uptime_kuma_url.rstrip("/")
+        self.api_key = config.uptime_kuma_api_key
+        self.session = requests.Session()
+        self.session.headers.update({
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        })
+
+    def _request(self, method: str, endpoint: str, **kwargs) -> dict:
+        """Make an API request."""
+        url = f"{self.base_url}/api{endpoint}"
+        response = self.session.request(method, url, **kwargs)
+        response.raise_for_status()
+        return response.json() if response.content else {}
+
+    def get_monitors(self) -> list[dict]:
+        """Get all monitors."""
+        try:
+            result = self._request("GET", "/monitors")
+            return result.get("monitors", [])
+        except Exception as e:
+            raise Exception(f"Failed to get monitors: {str(e)}")
+
+    def get_monitor(self, monitor_id: int) -> dict:
+        """Get a specific monitor."""
+        try:
+            result = self._request("GET", f"/monitors/{monitor_id}")
+            return result.get("monitor", {})
+        except Exception as e:
+            raise Exception(f"Failed to get monitor {monitor_id}: {str(e)}")
+
+    def create_monitor(self, monitor: Monitor) -> dict:
+        """Create a new monitor."""
+        try:
+            data = monitor.to_api_format()
+            result = self._request("POST", "/monitors", json=data)
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to create monitor: {str(e)}")
+
+    def update_monitor(self, monitor_id: int, monitor: Monitor) -> dict:
+        """Update an existing monitor."""
+        try:
+            data = monitor.to_api_format()
+            result = self._request("PUT", f"/monitors/{monitor_id}", json=data)
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to update monitor {monitor_id}: {str(e)}")
+
+    def delete_monitor(self, monitor_id: int) -> dict:
+        """Delete a monitor."""
+        try:
+            result = self._request("DELETE", f"/monitors/{monitor_id}")
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to delete monitor {monitor_id}: {str(e)}")
+
+    def pause_monitor(self, monitor_id: int) -> dict:
+        """Pause a monitor."""
+        try:
+            result = self._request("POST", f"/monitors/{monitor_id}/pause")
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to pause monitor {monitor_id}: {str(e)}")
+
+    def resume_monitor(self, monitor_id: int) -> dict:
+        """Resume a paused monitor."""
+        try:
+            result = self._request("POST", f"/monitors/{monitor_id}/resume")
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to resume monitor {monitor_id}: {str(e)}")
+
+    def get_status(self) -> dict:
+        """Get Uptime Kuma status/info."""
+        try:
+            result = self._request("GET", "/status-page")
+            return result
+        except Exception as e:
+            raise Exception(f"Failed to get status: {str(e)}")
+
+    def test_connection(self) -> bool:
+        """Test connection to Uptime Kuma."""
+        try:
+            self._request("GET", "/monitors")
+            return True
+        except Exception:
+            return False
+
+
+# Global client instance
+_kuma_client: Optional[UptimeKumaClient] = None
+
+
+def get_kuma_client() -> UptimeKumaClient:
+    """Get the global Uptime Kuma client instance."""
+    global _kuma_client
+    if _kuma_client is None:
+        _kuma_client = UptimeKumaClient()
+    return _kuma_client
--- a/backend/services/monitors.py
+++ b/backend/services/monitors.py
@@ -0,0 +1,249 @@
+from dataclasses import dataclass
+from typing import Optional
+
+from services.kuma_client import get_kuma_client, Monitor
+from services.claude_agent import MonitorSuggestion
+
+
+@dataclass
+class DefaultMonitorProfile:
+    """A default monitoring profile that doesn't require approval."""
+
+    name: str
+    description: str
+    monitors: list[Monitor]
+
+
+def create_host_health_monitors(hostname: str, ssh_port: int = 22) -> list[Monitor]:
+    """Create default host health monitors."""
+    return [
+        Monitor(
+            type="ping",
+            name=f"{hostname} - Ping",
+            hostname=hostname,
+            interval=60,
+        ),
+        Monitor(
+            type="tcp",
+            name=f"{hostname} - SSH",
+            hostname=hostname,
+            port=ssh_port,
+            interval=120,
+        ),
+    ]
+
+
+def create_web_server_monitors(hostname: str, port: int = 80, https: bool = False) -> list[Monitor]:
+    """Create monitors for a detected web server."""
+    protocol = "https" if https else "http"
+    return [
+        Monitor(
+            type="http",
+            name=f"{hostname} - Web ({port})",
+            url=f"{protocol}://{hostname}:{port}/",
+            interval=60,
+        ),
+    ]
+
+
+def create_docker_container_monitors(hostname: str, containers: list[dict]) -> list[Monitor]:
+    """Create monitors for detected Docker containers."""
+    monitors = []
+    for container in containers:
+        name = container.get("name", container.get("id", "unknown"))
+        monitors.append(
+            Monitor(
+                type="docker",
+                name=f"{hostname} - Container: {name}",
+                docker_container=name,
+                docker_host=hostname,
+                interval=60,
+            )
+        )
+    return monitors
+
+
+class MonitorService:
+    """Service for managing monitors in Uptime Kuma."""
+
+    def __init__(self):
+        self.created_monitors: list[dict] = []
+
+    def create_default_monitors(
+        self,
+        hostname: str,
+        ssh_port: int = 22,
+        has_docker: bool = False,
+        containers: Optional[list[dict]] = None,
+        web_ports: Optional[list[int]] = None,
+    ) -> list[dict]:
+        """
+        Create default monitors for a host.
+        These are built-in and never require approval.
+        """
+        kuma = get_kuma_client()
+        created = []
+
+        # Host health monitors
+        health_monitors = create_host_health_monitors(hostname, ssh_port)
+        for monitor in health_monitors:
+            try:
+                result = kuma.create_monitor(monitor)
+                created.append({
+                    "monitor": monitor.name,
+                    "type": monitor.type,
+                    "status": "created",
+                    "result": result,
+                })
+            except Exception as e:
+                created.append({
+                    "monitor": monitor.name,
+                    "type": monitor.type,
+                    "status": "failed",
+                    "error": str(e),
+                })
+
+        # Web server monitors
+        if web_ports:
+            for port in web_ports:
+                https = port == 443 or port == 8443
+                web_monitors = create_web_server_monitors(hostname, port, https)
+                for monitor in web_monitors:
+                    try:
+                        result = kuma.create_monitor(monitor)
+                        created.append({
+                            "monitor": monitor.name,
+                            "type": monitor.type,
+                            "status": "created",
+                            "result": result,
+                        })
+                    except Exception as e:
+                        created.append({
+                            "monitor": monitor.name,
+                            "type": monitor.type,
+                            "status": "failed",
+                            "error": str(e),
+                        })
+
+        # Docker container monitors
+        if has_docker and containers:
+            docker_monitors = create_docker_container_monitors(hostname, containers)
+            for monitor in docker_monitors:
+                try:
+                    result = kuma.create_monitor(monitor)
+                    created.append({
+                        "monitor": monitor.name,
+                        "type": monitor.type,
+                        "status": "created",
+                        "result": result,
+                    })
+                except Exception as e:
+                    created.append({
+                        "monitor": monitor.name,
+                        "type": monitor.type,
+                        "status": "failed",
+                        "error": str(e),
+                    })
+
+        self.created_monitors.extend(created)
+        return created
+
+    def create_from_suggestion(self, suggestion: MonitorSuggestion, hostname: str) -> dict:
+        """
+        Create a monitor from a Claude suggestion.
+        In production mode, this executes automatically.
+        In dev mode, this should only be called after approval.
+        """
+        kuma = get_kuma_client()
+
+        # Build monitor from suggestion
+        monitor = Monitor(
+            type=suggestion.type,
+            name=suggestion.name,
+            interval=suggestion.interval,
+        )
+
+        # Set type-specific fields
+        if suggestion.type == "http" or suggestion.type == "keyword":
+            monitor.url = suggestion.target
+            if suggestion.keyword:
+                monitor.keyword = suggestion.keyword
+        elif suggestion.type == "tcp":
+            monitor.hostname = suggestion.target
+            monitor.port = suggestion.port
+        elif suggestion.type == "ping":
+            monitor.hostname = suggestion.target
+        elif suggestion.type == "docker":
+            monitor.docker_container = suggestion.target
+            monitor.docker_host = hostname
+
+        try:
+            result = kuma.create_monitor(monitor)
+            return {
+                "monitor": monitor.name,
+                "type": monitor.type,
+                "status": "created",
+                "result": result,
+                "reason": suggestion.reason,
+            }
+        except Exception as e:
+            return {
+                "monitor": monitor.name,
+                "type": monitor.type,
+                "status": "failed",
+                "error": str(e),
+                "reason": suggestion.reason,
+            }
+
+    def get_existing_monitors(self) -> list[dict]:
+        """Get all existing monitors from Uptime Kuma."""
+        kuma = get_kuma_client()
+        return kuma.get_monitors()
+
+
+def parse_web_ports_from_scan(open_ports: str) -> list[int]:
+    """Extract web server ports from port scan output."""
+    common_web_ports = [80, 443, 8080, 8443, 3000, 5000, 8000]
+    found_ports = []
+
+    for port in common_web_ports:
+        if f":{port}" in open_ports or f" {port} " in open_ports:
+            found_ports.append(port)
+
+    return found_ports
+
+
+def parse_docker_containers_from_scan(docker_output: str) -> list[dict]:
+    """Parse Docker container info from scan output."""
+    containers = []
+
+    if "Docker not available" in docker_output or not docker_output.strip():
+        return containers
+
+    for line in docker_output.strip().split("\n"):
+        if not line.strip():
+            continue
+
+        parts = line.split("\t")
+        if len(parts) >= 2:
+            containers.append({
+                "id": parts[0] if len(parts) > 0 else "",
+                "name": parts[1] if len(parts) > 1 else "",
+                "image": parts[2] if len(parts) > 2 else "",
+                "status": parts[3] if len(parts) > 3 else "",
+                "ports": parts[4] if len(parts) > 4 else "",
+            })
+
+    return containers
+
+
+# Global monitor service instance
+_monitor_service: Optional[MonitorService] = None
+
+
+def get_monitor_service() -> MonitorService:
+    """Get the global monitor service instance."""
+    global _monitor_service
+    if _monitor_service is None:
+        _monitor_service = MonitorService()
+    return _monitor_service
--- a/backend/services/ssh_manager.py
+++ b/backend/services/ssh_manager.py
@@ -0,0 +1,175 @@
+import io
+import threading
+from typing import Optional, Callable
+from dataclasses import dataclass
+
+import paramiko
+
+from config import get_config
+
+
+@dataclass
+class CommandResult:
+    """Result of an SSH command execution."""
+
+    stdout: str
+    stderr: str
+    exit_code: int
+    success: bool
+
+
+class SSHManager:
+    """Manages SSH connections to target hosts."""
+
+    def __init__(self):
+        self._connections: dict[str, paramiko.SSHClient] = {}
+        self._lock = threading.Lock()
+
+    def _get_private_key(self) -> paramiko.PKey:
+        """Parse the private key from config."""
+        config = get_config()
+        key_data = config.ssh_private_key
+
+        # Try different key formats
+        key_file = io.StringIO(key_data)
+
+        for key_class in [paramiko.RSAKey, paramiko.Ed25519Key, paramiko.ECDSAKey]:
+            try:
+                key_file.seek(0)
+                return key_class.from_private_key(key_file)
+            except Exception:
+                continue
+
+        raise ValueError("Unable to parse SSH private key. Supported formats: RSA, Ed25519, ECDSA")
+
+    def connect(self, hostname: str, username: str = "root", port: int = 22) -> bool:
+        """Establish SSH connection to a host."""
+        connection_key = f"{username}@{hostname}:{port}"
+
+        with self._lock:
+            if connection_key in self._connections:
+                # Test if connection is still alive
+                try:
+                    transport = self._connections[connection_key].get_transport()
+                    if transport and transport.is_active():
+                        return True
+                except Exception:
+                    pass
+                # Remove dead connection
+                self._connections.pop(connection_key, None)
+
+            try:
+                client = paramiko.SSHClient()
+                client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+
+                private_key = self._get_private_key()
+                client.connect(
+                    hostname=hostname,
+                    port=port,
+                    username=username,
+                    pkey=private_key,
+                    timeout=30,
+                    allow_agent=False,
+                    look_for_keys=False,
+                )
+                self._connections[connection_key] = client
+                return True
+            except Exception as e:
+                raise ConnectionError(f"Failed to connect to {connection_key}: {str(e)}")
+
+    def execute(
+        self,
+        hostname: str,
+        command: str,
+        username: str = "root",
+        port: int = 22,
+        timeout: int = 60,
+        on_output: Optional[Callable[[str], None]] = None,
+    ) -> CommandResult:
+        """Execute a command on a remote host."""
+        connection_key = f"{username}@{hostname}:{port}"
+
+        with self._lock:
+            client = self._connections.get(connection_key)
+            if not client:
+                raise ConnectionError(f"Not connected to {connection_key}. Call connect() first.")
+
+        try:
+            stdin, stdout, stderr = client.exec_command(command, timeout=timeout)
+
+            # Read output
+            stdout_data = ""
+            stderr_data = ""
+
+            # Stream stdout if callback provided
+            if on_output:
+                for line in stdout:
+                    stdout_data += line
+                    on_output(line.rstrip("\n"))
+            else:
+                stdout_data = stdout.read().decode("utf-8", errors="replace")
+
+            stderr_data = stderr.read().decode("utf-8", errors="replace")
+            exit_code = stdout.channel.recv_exit_status()
+
+            return CommandResult(
+                stdout=stdout_data,
+                stderr=stderr_data,
+                exit_code=exit_code,
+                success=exit_code == 0,
+            )
+        except Exception as e:
+            return CommandResult(
+                stdout="",
+                stderr=str(e),
+                exit_code=-1,
+                success=False,
+            )
+
+    def disconnect(self, hostname: str, username: str = "root", port: int = 22) -> None:
+        """Close SSH connection to a host."""
+        connection_key = f"{username}@{hostname}:{port}"
+
+        with self._lock:
+            client = self._connections.pop(connection_key, None)
+            if client:
+                try:
+                    client.close()
+                except Exception:
+                    pass
+
+    def disconnect_all(self) -> None:
+        """Close all SSH connections."""
+        with self._lock:
+            for client in self._connections.values():
+                try:
+                    client.close()
+                except Exception:
+                    pass
+            self._connections.clear()
+
+    def is_connected(self, hostname: str, username: str = "root", port: int = 22) -> bool:
+        """Check if connected to a host."""
+        connection_key = f"{username}@{hostname}:{port}"
+
+        with self._lock:
+            client = self._connections.get(connection_key)
+            if not client:
+                return False
+            try:
+                transport = client.get_transport()
+                return transport is not None and transport.is_active()
+            except Exception:
+                return False
+
+
+# Global SSH manager instance
+_ssh_manager: Optional[SSHManager] = None
+
+
+def get_ssh_manager() -> SSHManager:
+    """Get the global SSH manager instance."""
+    global _ssh_manager
+    if _ssh_manager is None:
+        _ssh_manager = SSHManager()
+    return _ssh_manager