kuma-strapper/backend/services/claude_agent.py

import json
from typing import Optional
from dataclasses import dataclass

from anthropic import Anthropic

from config import get_config


SYSTEM_PROMPT = """You are an intelligent monitoring configuration assistant for Uptime Kuma. Your role is to analyze system information from hosts and recommend what should be monitored.

## Your Capabilities
1. Analyze host scan results (OS info, running services, Docker containers, open ports)
2. Suggest monitors to create in Uptime Kuma
3. Request additional SSH commands to gather more information when needed
4. Explain your monitoring recommendations

## Rules for Suggestions
1. **Always explain WHY** you want to monitor something - what failure would it detect?
2. **Be specific** with monitor configurations (ports, paths, intervals)
3. **Prioritize critical services** - databases, web servers, auth services come first
4. **Suggest appropriate intervals** based on criticality:
   - Critical services (databases, auth): 30-60 seconds
   - Web services: 60-120 seconds
   - Background jobs: 300 seconds
5. **Look for health endpoints** - prefer /health, /healthz, /status over root paths
6. **Consider dependencies** - if a service depends on another, both should be monitored

## Rules for SSH Commands
When you need more information, you can request SSH commands. Follow these rules:
1. **Read-only only** - never suggest commands that modify the system
2. **Be specific** - explain exactly what information you need and why
3. **Safe commands only** - no sudo unless absolutely necessary for reading
4. **Examples of acceptable commands:**
   - `curl -s localhost:8080/health` - check if a service responds
   - `cat /etc/nginx/nginx.conf` - read configuration
   - `docker inspect <container>` - get container details
   - `systemctl status <service>` - check service status

## Response Format
Always respond with valid JSON in this structure:
{
    "analysis": "Your analysis of what you found on the host",
    "monitors": [
        {
            "type": "http|tcp|ping|docker|keyword|push",
            "name": "Human-readable monitor name",
            "target": "URL, hostname, container name, or metric type for push",
            "port": 80,
            "interval": 60,
            "reason": "Why this should be monitored",
            "push_metric": "heartbeat|disk|memory|cpu|updates (only for push type)"
        }
    ],
    "additional_commands": [
        {
            "command": "the SSH command to run",
            "reason": "why you need this information"
        }
    ],
    "questions": ["Any questions for the user about what to monitor"]
}

## Monitor Types
- **http**: Web endpoints (provide full URL with protocol)
- **tcp**: Port connectivity (provide hostname and port)
- **ping**: Host availability (provide hostname)
- **docker**: Docker container status (provide container name)
- **keyword**: Check for specific text in response (provide URL and keyword)
- **push**: Push-based monitors for system metrics (scripts on host push to Uptime Kuma)

## Push Monitors for System Metrics
Always suggest push monitors for system health metrics. These run as cron jobs on the host and push status to Uptime Kuma. Suggest these based on what you see:

1. **Heartbeat** - Simple "I'm alive" check for hosts that can't be reached directly (behind NAT/firewall)
   - Name: "{hostname} - Heartbeat"
   - push_metric: "heartbeat"
   - Use this INSTEAD of ping for remote hosts that Uptime Kuma cannot reach directly

2. **Disk Space** - Alert when any partition exceeds 90% usage
   - Name: "{hostname} - Disk Space"
   - push_metric: "disk"

3. **Memory Usage** - Alert when RAM exceeds 90% usage
   - Name: "{hostname} - Memory"
   - push_metric: "memory"

4. **CPU Load** - Alert when 5-min load average exceeds 95% of CPU cores
   - Name: "{hostname} - CPU Load"
   - push_metric: "cpu"

5. **System Updates** - Alert when security updates are pending (Debian/Ubuntu/RHEL)
   - Name: "{hostname} - Updates"
   - push_metric: "updates"

For push monitors, set:
- type: "push"
- target: the metric type (heartbeat, disk, memory, cpu, updates)
- interval: 300 (5 minutes is typical for system metrics)

Be thorough but not excessive. Quality over quantity - suggest monitors that will actually catch real problems."""


@dataclass
class MonitorSuggestion:
    """A suggested monitor configuration."""

    type: str
    name: str
    target: str
    port: Optional[int] = None
    interval: int = 60
    reason: str = ""
    keyword: Optional[str] = None
    push_metric: Optional[str] = None


@dataclass
class CommandRequest:
    """A request to run an SSH command."""

    command: str
    reason: str


@dataclass
class AgentResponse:
    """Response from the Claude agent."""

    analysis: str
    monitors: list[MonitorSuggestion]
    additional_commands: list[CommandRequest]
    questions: list[str]
    raw_response: str


class ClaudeAgent:
    """Claude AI agent for intelligent monitoring suggestions."""

    def __init__(self):
        config = get_config()
        self.client = Anthropic(api_key=config.claude_api_key)
        self.conversation_history: list[dict] = []

    def analyze_host(self, scan_results: dict, hostname: str) -> AgentResponse:
        """Analyze host scan results and suggest monitors."""
        user_message = f"""I've scanned the host '{hostname}' and gathered the following information:

## System Information
```
{scan_results.get('system_info', 'Not available')}
```

## OS Release
```
{scan_results.get('os_release', 'Not available')}
```

## Running Docker Containers
```
{scan_results.get('docker_containers', 'No Docker or no containers running')}
```

## Running Systemd Services
```
{scan_results.get('systemd_services', 'Not available')}
```

## Disk Usage
```
{scan_results.get('disk_usage', 'Not available')}
```

## Memory Usage
```
{scan_results.get('memory_usage', 'Not available')}
```

## CPU Info
```
{scan_results.get('cpu_count', 'Not available')} CPU cores
```

## Open Ports (Listening)
```
{scan_results.get('open_ports', 'Not available')}
```

Please analyze this information and suggest what should be monitored in Uptime Kuma.
Respond with JSON as specified in your instructions."""

        return self._send_message(user_message)

    def process_command_results(self, command: str, result: str) -> AgentResponse:
        """Process the results of an additional SSH command."""
        user_message = f"""Here are the results of the command you requested:

Command: `{command}`

Output:
```
{result}
```

Please update your analysis and suggestions based on this new information.
Respond with JSON as specified in your instructions."""

        return self._send_message(user_message)

    def answer_question(self, question: str, answer: str) -> AgentResponse:
        """Process user's answer to a question."""
        user_message = f"""You asked: "{question}"

The user responded: "{answer}"

Please update your recommendations based on this information.
Respond with JSON as specified in your instructions."""

        return self._send_message(user_message)

    def _send_message(self, user_message: str) -> AgentResponse:
        """Send a message to Claude and parse the response."""
        self.conversation_history.append({"role": "user", "content": user_message})

        response = self.client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=4096,
            system=SYSTEM_PROMPT,
            messages=self.conversation_history,
        )

        assistant_message = response.content[0].text
        self.conversation_history.append({"role": "assistant", "content": assistant_message})

        return self._parse_response(assistant_message)

    def _parse_response(self, response_text: str) -> AgentResponse:
        """Parse Claude's JSON response."""
        # Try to extract JSON from the response
        try:
            # Look for JSON block in the response
            json_start = response_text.find("{")
            json_end = response_text.rfind("}") + 1

            if json_start != -1 and json_end > json_start:
                json_str = response_text[json_start:json_end]
                data = json.loads(json_str)
            else:
                # No JSON found, return empty response
                return AgentResponse(
                    analysis=response_text,
                    monitors=[],
                    additional_commands=[],
                    questions=[],
                    raw_response=response_text,
                )

            monitors = []
            for m in data.get("monitors", []):
                monitors.append(
                    MonitorSuggestion(
                        type=m.get("type", "http"),
                        name=m.get("name", "Unknown"),
                        target=m.get("target", ""),
                        port=m.get("port"),
                        interval=m.get("interval", 60),
                        reason=m.get("reason", ""),
                        keyword=m.get("keyword"),
                        push_metric=m.get("push_metric"),
                    )
                )

            commands = []
            for c in data.get("additional_commands", []):
                commands.append(
                    CommandRequest(
                        command=c.get("command", ""),
                        reason=c.get("reason", ""),
                    )
                )

            return AgentResponse(
                analysis=data.get("analysis", ""),
                monitors=monitors,
                additional_commands=commands,
                questions=data.get("questions", []),
                raw_response=response_text,
            )

        except json.JSONDecodeError:
            return AgentResponse(
                analysis=response_text,
                monitors=[],
                additional_commands=[],
                questions=[],
                raw_response=response_text,
            )

    def reset_conversation(self) -> None:
        """Reset the conversation history."""
        self.conversation_history = []


def create_agent() -> ClaudeAgent:
    """Create a new Claude agent instance."""
    return ClaudeAgent()