Initial commit: ComfyUI RunPod Serverless endpoint

- Dockerfile with CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128 - SageAttention 2.2 compiled from source - Nunchaku wheel installation - 12 custom nodes pre-installed - Handler with image/video output support - Model symlinks to /userdata network volume 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-25 21:59:09 +13:00
commit a5adfe060e
5 changed files with 978 additions and 0 deletions
--- a/127
+++ b/127
@@ -0,0 +1,127 @@
 # ComfyUI RunPod Serverless - CUDA 12.8.1, Python 3.12, PyTorch 2.8.0
 FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV PIP_NO_CACHE_DIR=1
 # CUDA environment
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LIBRARY_PATH
 ENV PATH=/usr/local/cuda/bin:$PATH
 # HuggingFace cache paths (will be symlinked to network volume)
 ENV HF_HOME=/workspace/.cache/huggingface
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
 ENV TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    python3.12 \
    python3.12-dev \
    python3.12-venv \
    python3-pip \
    git \
    git-lfs \
    wget \
    curl \
    ffmpeg \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libgomp1 \
    build-essential \
    ninja-build \
    && rm -rf /var/lib/apt/lists/*
 # Set Python 3.12 as default
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
 # Upgrade pip
 RUN python -m pip install --upgrade pip setuptools wheel
 # Install PyTorch 2.8.0+cu128 and triton 3.4.0
 RUN pip install \
    torch==2.8.0+cu128 \
    torchvision==0.23.0+cu128 \
    torchaudio==2.8.0+cu128 \
    --index-url https://download.pytorch.org/whl/cu128
 RUN pip install triton==3.4.0
 # Install nunchaku from GitHub wheel
 RUN pip install https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl
 # Install key dependencies before SageAttention
 COPY requirements.txt /tmp/requirements.txt
 RUN pip install -r /tmp/requirements.txt
 # Compile SageAttention 2.2 from source with no build isolation
 WORKDIR /tmp
 ENV EXT_PARALLEL=4
 ENV NVCC_APPEND_FLAGS="--threads 8"
 ENV MAX_JOBS=32
 RUN git clone https://github.com/thu-ml/SageAttention.git && \
    cd SageAttention && \
    pip install --no-build-isolation -e .
 # Clone ComfyUI
 WORKDIR /workspace
 RUN git clone https://github.com/comfyanonymous/ComfyUI.git && \
    cd ComfyUI && \
    pip install -r requirements.txt
 # Install custom nodes
 WORKDIR /workspace/ComfyUI/custom_nodes
 RUN git clone https://github.com/ltdrdata/ComfyUI-Manager.git && \
    git clone https://github.com/jnxmx/ComfyUI_HuggingFace_Downloader.git && \
    git clone https://github.com/kijai/ComfyUI-KJNodes.git && \
    git clone https://github.com/Fannovel16/comfyui_controlnet_aux.git && \
    git clone https://github.com/crystian/ComfyUI-Crystools.git && \
    git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git && \
    git clone https://github.com/willmiao/ComfyUI-Lora-Manager.git && \
    git clone https://github.com/city96/ComfyUI-GGUF.git && \
    git clone https://github.com/Fannovel16/ComfyUI-Frame-Interpolation.git && \
    git clone https://github.com/nunchaku-tech/ComfyUI-nunchaku.git && \
    git clone https://github.com/evanspearman/ComfyMath.git && \
    git clone https://github.com/ssitu/ComfyUI_UltimateSDUpscale.git
 # Install custom node dependencies
 RUN cd ComfyUI-KJNodes && pip install -r requirements.txt || true
 RUN cd comfyui_controlnet_aux && pip install -r requirements.txt || true
 RUN cd ComfyUI-VideoHelperSuite && pip install -r requirements.txt || true
 RUN cd ComfyUI-GGUF && pip install -r requirements.txt || true
 RUN cd ComfyUI-Frame-Interpolation && pip install -r requirements.txt || true
 RUN cd ComfyUI-nunchaku && pip install -r requirements.txt || true
 # Create directories and symlinks to network volume
 WORKDIR /workspace/ComfyUI
 RUN mkdir -p /userdata/models/checkpoints \
    /userdata/models/loras \
    /userdata/models/vae \
    /userdata/models/controlnet \
    /userdata/models/clip \
    /userdata/models/upscale_models \
    /userdata/.cache/huggingface \
    /workspace/.cache
 # Symlink model directories to /userdata
 RUN rm -rf models/checkpoints && ln -s /userdata/models/checkpoints models/checkpoints && \
    rm -rf models/loras && ln -s /userdata/models/loras models/loras && \
    rm -rf models/vae && ln -s /userdata/models/vae models/vae && \
    rm -rf models/controlnet && ln -s /userdata/models/controlnet models/controlnet && \
    rm -rf models/clip && ln -s /userdata/models/clip models/clip && \
    rm -rf models/upscale_models && ln -s /userdata/models/upscale_models models/upscale_models
 # Symlink HuggingFace cache
 RUN ln -s /userdata/.cache/huggingface /workspace/.cache/huggingface
 # Copy handler
 WORKDIR /workspace
 COPY handler.py /workspace/handler.py
 # RunPod handler entrypoint
 CMD ["python", "-u", "handler.py"]
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -0,0 +1,163 @@
 # ComfyUI RunPod Serverless Project
 ## Project Overview
 Build a RunPod Serverless endpoint running ComfyUI with SageAttention 2.2 for image/video generation. Self-hosted frontend will call the RunPod API.
 ## Architecture
 - **RunPod Serverless**: Hosts ComfyUI worker with GPU inference
 - **Network Volume**: Mounts at `/userdata`
 - **Gitea Registry**: Hosts Docker image
 - **Frontend**: Self-hosted on home server, calls RunPod API over HTTPS
 ## Reference Environment (extracted from working pod)
 ### Base System
 - Ubuntu 22.04.5 LTS (Jammy)
 - Python 3.12.12
 - CUDA 12.8 (nvcc 12.8.93)
 - cuDNN 9.8.0.87
 - NCCL 2.25.1
 ### PyTorch Stack
 - torch==2.8.0+cu128
 - torchvision==0.23.0+cu128
 - torchaudio==2.8.0+cu128
 - triton==3.4.0
 ### Key Dependencies
 - transformers==4.56.2
 - diffusers==0.35.2
 - accelerate==1.11.0
 - safetensors==0.6.2
 - onnxruntime-gpu==1.23.2
 - opencv-python==4.12.0.88
 - mediapipe==0.10.14
 - insightface==0.7.3
 - spandrel==0.4.1
 - kornia==0.8.2
 - einops==0.8.1
 - timm==1.0.22
 - peft==0.17.1
 - gguf==0.17.1
 - av==16.0.1 (video)
 - imageio-ffmpeg==0.6.0
 ### Nunchaku (prebuilt wheel)
 ```
 nunchaku @ https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl
 ```
 ### ComfyUI
 - Location: `/workspace/ComfyUI`
 - Uses venv at `/workspace/ComfyUI/venv`
 - Commit: 532e2850794c7b497174a0a42ac0cb1fe5b62499 (Dec 24, 2025)
 ### Custom Nodes (from CUSTOM_NODES env var + actual install)
 ```
 ltdrdata/ComfyUI-Manager
 jnxmx/ComfyUI_HuggingFace_Downloader
 kijai/ComfyUI-KJNodes
 Fannovel16/comfyui_controlnet_aux
 crystian/ComfyUI-Crystools
 Kosinkadink/ComfyUI-VideoHelperSuite
 willmiao/ComfyUI-Lora-Manager
 city96/ComfyUI-GGUF
 Fannovel16/ComfyUI-Frame-Interpolation
 nunchaku-tech/ComfyUI-nunchaku
 evanspearman/ComfyMath
 ssitu/ComfyUI_UltimateSDUpscale
 ```
 ### Environment Variables (relevant)
 ```bash
 HF_HOME=/workspace/.cache/huggingface
 HF_HUB_ENABLE_HF_TRANSFER=1
 TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers
 PYTHONUNBUFFERED=1
 LD_LIBRARY_PATH=/usr/local/cuda/lib64
 LIBRARY_PATH=/usr/local/cuda/lib64/stubs
 ```
 ### Network Volume Mount
 - Mount point: `/userdata`
 ## Technical Requirements
 ### SageAttention 2.2 (Critical)
 Must be compiled from source with no build isolation:
 ```bash
 git clone https://github.com/thu-ml/SageAttention.git
 cd SageAttention
 pip install triton
 export EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32
 pip install --no-build-isolation -e .
 ```
 ### Network Volume Structure
 ```
 /userdata/
 ├── models/
 │   ├── checkpoints/
 │   ├── loras/
 │   ├── vae/
 │   ├── controlnet/
 │   ├── clip/
 │   └── upscale_models/
 └── .cache/
    └── huggingface/
 ```
 ### Handler Requirements
 - Accept JSON input: `{"image": "base64", "prompt": "string", "workflow": {}}`
 - Image upload to ComfyUI if provided
 - Inject prompt into workflow at specified node
 - Queue workflow, poll for completion
 - Return output as base64:
  - Images: PNG/JPEG base64
  - Videos: MP4 base64 (or presigned URL if >10MB)
 - Detect output type from workflow output node
 - Timeout handling (max 600s for video generation)
 ### Dockerfile Requirements
 - Base: `nvidia/cuda:12.8.1-devel-ubuntu22.04` (or equivalent with CUDA 12.8 devel)
 - Python 3.12
 - PyTorch 2.8.0+cu128 from pytorch index
 - Install nunchaku from GitHub wheel
 - Compile SageAttention with --no-build-isolation
 - Symlink model directories to /userdata
 - Clone and install all custom nodes
 - Install ffmpeg for video handling
 - Expose handler as entrypoint
 ## File Structure
 ```
 /project
 ├── Dockerfile
 ├── handler.py
 ├── requirements.txt
 ├── scripts/
 │   └── install_custom_nodes.sh
 ├── workflows/
 │   └── default_workflow_api.json
 └── README.md
 ```
 ## Tasks
 1. Create Dockerfile matching reference environment (CUDA 12.8, Python 3.12, PyTorch 2.8)
 2. Create requirements.txt from extracted pip freeze (pruned to essentials)
 3. Create install_custom_nodes.sh for all listed custom nodes
 4. Create handler.py with ComfyUI API integration (image + video output support)
 5. Document deployment steps in README.md
 ## Notes
 - Nick is a Principal Systems Engineer, prefers direct technical communication
 - Target deployment: RunPod Serverless with 5090 GPU
 - Development machine: RTX 3080 (forward compatible)
 - Registry: Self-hosted Gitea
 - Output will likely be video - ensure ffmpeg installed and handler detects output type
 - Reference pod uses venv - serverless image can install globally
 ## Claude Code Init Command
 ```
 Read PROJECT.md fully. Build the Dockerfile first, matching the reference environment exactly: CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128, triton 3.4.0. Install nunchaku from the GitHub wheel URL. Compile SageAttention 2.2 with --no-build-isolation. Install all custom nodes listed. Symlink model paths to /userdata. Do not use a venv in the container.
 ```
--- a/README.md
+++ b/README.md
@@ -0,0 +1,279 @@
 # ComfyUI RunPod Serverless
 RunPod Serverless endpoint for ComfyUI with SageAttention 2.2, supporting image and video generation workflows.
 ## Stack
 - CUDA 12.8.1 / Ubuntu 22.04
 - Python 3.12
 - PyTorch 2.8.0+cu128
 - SageAttention 2.2 (compiled)
 - Nunchaku 1.0.2
 - 12 custom nodes pre-installed
 ## Prerequisites
 - Docker with NVIDIA runtime
 - RunPod account with API key
 - Network volume created in RunPod
 - Container registry (Docker Hub, Gitea, etc.)
 ## Build
 ```bash
 docker build -t comfyui-runpod:latest .
 ```
 Build with specific platform (if building on ARM):
 ```bash
 docker build --platform linux/amd64 -t comfyui-runpod:latest .
 ```
 ## Push to Registry
 Docker Hub:
 ```bash
 docker tag comfyui-runpod:latest yourusername/comfyui-runpod:latest
 docker push yourusername/comfyui-runpod:latest
 ```
 Self-hosted Gitea:
 ```bash
 docker tag comfyui-runpod:latest git.yourdomain.com/username/comfyui-runpod:latest
 docker push git.yourdomain.com/username/comfyui-runpod:latest
 ```
 ## Network Volume Setup
 Create a network volume in RunPod and populate with models:
 ```
 /userdata/
 ├── models/
 │   ├── checkpoints/     # SD, SDXL, Flux models
 │   ├── loras/           # LoRA models
 │   ├── vae/             # VAE models
 │   ├── controlnet/      # ControlNet models
 │   ├── clip/            # CLIP models
 │   └── upscale_models/  # Upscaler models
 └── .cache/
    └── huggingface/     # HF model cache
 ```
 Upload models via RunPod pod or rclone to the network volume before deploying serverless.
 ## RunPod Deployment
 1. Go to RunPod Console > Serverless > New Endpoint
 2. Configure endpoint:
   - **Container Image**: `yourusername/comfyui-runpod:latest`
   - **GPU**: RTX 4090, RTX 5090, or A100 recommended
   - **Network Volume**: Select your volume (mounts at `/userdata`)
   - **Active Workers**: 0 (scale to zero)
   - **Max Workers**: Based on budget
   - **Idle Timeout**: 5-10 seconds
   - **Execution Timeout**: 600 seconds (for video)
 3. Deploy and note the Endpoint ID
 ## API Usage
 ### Endpoint URL
 ```
 https://api.runpod.ai/v2/{ENDPOINT_ID}/runsync
 ```
 ### Headers
 ```
 Authorization: Bearer {RUNPOD_API_KEY}
 Content-Type: application/json
 ```
 ### Request Schema
 ```json
 {
  "input": {
    "workflow": {},
    "prompt": "optional prompt text",
    "image": "optional base64 image",
    "prompt_node_id": "optional node id for prompt",
    "image_node_id": "optional node id for image",
    "timeout": 300
  }
 }
 ```
 ### Example: Text-to-Image
 ```bash
 curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \
  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
  -H "Content-Type: application/json" \
  -d '{
    "input": {
      "workflow": '"$(cat workflow_api.json)"',
      "prompt": "a photo of a cat in space"
    }
  }'
 ```
 ### Example: Image-to-Video
 ```bash
 curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \
  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
  -H "Content-Type: application/json" \
  -d '{
    "input": {
      "workflow": '"$(cat i2v_workflow_api.json)"',
      "image": "'"$(base64 -w0 input.png)"'",
      "prompt": "the cat walks forward",
      "timeout": 600
    }
  }'
 ```
 ### Response Schema
 Success:
 ```json
 {
  "id": "job-id",
  "status": "COMPLETED",
  "output": {
    "status": "success",
    "prompt_id": "abc123",
    "outputs": [
      {
        "type": "video",
        "filename": "output.mp4",
        "data": "base64...",
        "size": 1234567
      }
    ]
  }
 }
 ```
 Large files (>10MB video):
 ```json
 {
  "outputs": [
    {
      "type": "video",
      "filename": "output.mp4",
      "path": "/userdata/outputs/output.mp4",
      "size": 52428800
    }
  ]
 }
 ```
 Error:
 ```json
 {
  "output": {
    "error": "error message",
    "status": "error"
  }
 }
 ```
 ## Async Execution
 For long-running video jobs, use async endpoint:
 ```bash
 # Submit job
 curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/run" \
  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
  -H "Content-Type: application/json" \
  -d '{"input": {...}}'
 # Response: {"id": "job-id", "status": "IN_QUEUE"}
 # Poll for result
 curl "https://api.runpod.ai/v2/${ENDPOINT_ID}/status/${JOB_ID}" \
  -H "Authorization: Bearer ${RUNPOD_API_KEY}"
 ```
 ## Workflow Export
 Export workflows from ComfyUI in API format:
 1. Open ComfyUI
 2. Enable Dev Mode in settings
 3. Click "Save (API Format)"
 4. Use the exported JSON as the `workflow` parameter
 ## Custom Nodes Included
 - ComfyUI-Manager
 - ComfyUI_HuggingFace_Downloader
 - ComfyUI-KJNodes
 - comfyui_controlnet_aux
 - ComfyUI-Crystools
 - ComfyUI-VideoHelperSuite
 - ComfyUI-Lora-Manager
 - ComfyUI-GGUF
 - ComfyUI-Frame-Interpolation
 - ComfyUI-nunchaku
 - ComfyMath
 - ComfyUI_UltimateSDUpscale
 ## Troubleshooting
 ### Cold Start Timeout
 First request starts ComfyUI server (~30-60s). Increase idle timeout or use warm workers.
 ### Out of Memory
 Reduce batch size or resolution in workflow. Use GGUF quantized models for large models.
 ### Model Not Found
 Ensure models are uploaded to correct `/userdata/models/` subdirectory matching ComfyUI folder structure.
 ### Video Generation Timeout
 Default max is 600s. For longer videos, split into segments or increase resolution/reduce frames.
 ### Connection Refused
 ComfyUI server may have crashed. Check logs in RunPod console. Ensure workflow is valid.
 ## Local Testing
 ```bash
 # Build
 docker build -t comfyui-runpod:latest .
 # Run with GPU
 docker run --gpus all -p 8188:8188 \
  -v /path/to/models:/userdata/models \
  comfyui-runpod:latest
 # Test handler
 curl -X POST http://localhost:8188/runsync \
  -H "Content-Type: application/json" \
  -d '{"input": {"workflow": {...}}}'
 ```
 ## Environment Variables
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `HF_HOME` | `/workspace/.cache/huggingface` | HuggingFace cache |
 | `HF_HUB_ENABLE_HF_TRANSFER` | `1` | Fast HF downloads |
 | `PYTHONUNBUFFERED` | `1` | Realtime logs |
--- a/handler.py
+++ b/handler.py
@@ -0,0 +1,371 @@
 """
 ComfyUI RunPod Serverless Handler
 Handles image/video generation workflows with ComfyUI API
 """
 import os
 import sys
 import json
 import time
 import base64
 import uuid
 import subprocess
 import signal
 import requests
 from pathlib import Path
 from urllib.parse import urljoin
 import runpod
 # Configuration
 COMFYUI_DIR = "/workspace/ComfyUI"
 COMFYUI_PORT = 8188
 COMFYUI_HOST = f"http://127.0.0.1:{COMFYUI_PORT}"
 MAX_TIMEOUT = 600  # 10 minutes max for video generation
 POLL_INTERVAL = 1.0
 STARTUP_TIMEOUT = 120
 # Global ComfyUI process
 comfyui_process = None
 def start_comfyui():
    """Start ComfyUI server if not already running."""
    global comfyui_process
    if comfyui_process is not None and comfyui_process.poll() is None:
        return True
    print("Starting ComfyUI server...")
    comfyui_process = subprocess.Popen(
        [
            sys.executable, "main.py",
            "--listen", "127.0.0.1",
            "--port", str(COMFYUI_PORT),
            "--disable-auto-launch"
        ],
        cwd=COMFYUI_DIR,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        preexec_fn=os.setsid if hasattr(os, 'setsid') else None
    )
    # Wait for server to be ready
    start_time = time.time()
    while time.time() - start_time < STARTUP_TIMEOUT:
        try:
            resp = requests.get(f"{COMFYUI_HOST}/system_stats", timeout=2)
            if resp.status_code == 200:
                print("ComfyUI server ready")
                return True
        except requests.exceptions.RequestException:
            pass
        time.sleep(1)
    print("ComfyUI server failed to start")
    return False
 def stop_comfyui():
    """Stop ComfyUI server."""
    global comfyui_process
    if comfyui_process is not None:
        try:
            os.killpg(os.getpgid(comfyui_process.pid), signal.SIGTERM)
        except (OSError, ProcessLookupError):
            comfyui_process.terminate()
        comfyui_process = None
 def upload_image(image_base64: str, filename: str = None) -> str:
    """Upload base64 image to ComfyUI and return the filename."""
    if filename is None:
        filename = f"input_{uuid.uuid4().hex[:8]}.png"
    # Decode base64
    image_data = base64.b64decode(image_base64)
    # Upload to ComfyUI
    files = {
        "image": (filename, image_data, "image/png"),
    }
    data = {
        "overwrite": "true"
    }
    resp = requests.post(
        f"{COMFYUI_HOST}/upload/image",
        files=files,
        data=data
    )
    if resp.status_code != 200:
        raise Exception(f"Failed to upload image: {resp.text}")
    result = resp.json()
    return result.get("name", filename)
 def inject_prompt_into_workflow(workflow: dict, prompt: str, prompt_node_id: str = None) -> dict:
    """Inject prompt text into workflow at specified node or auto-detect."""
    workflow = workflow.copy()
    # If specific node ID provided, use it
    if prompt_node_id and prompt_node_id in workflow:
        node = workflow[prompt_node_id]
        if "inputs" in node:
            # Common prompt input field names
            for field in ["text", "prompt", "positive", "string"]:
                if field in node["inputs"]:
                    node["inputs"][field] = prompt
                    return workflow
    # Auto-detect: find nodes that look like text/prompt inputs
    prompt_node_types = [
        "CLIPTextEncode",
        "CLIPTextEncodeSDXL",
        "Text Multiline",
        "String",
        "TextInput"
    ]
    for node_id, node in workflow.items():
        class_type = node.get("class_type", "")
        if class_type in prompt_node_types:
            if "inputs" in node:
                for field in ["text", "prompt", "positive", "string"]:
                    if field in node["inputs"]:
                        # Only inject into positive prompts, skip negative
                        if "negative" not in node.get("_meta", {}).get("title", "").lower():
                            node["inputs"][field] = prompt
                            return workflow
    return workflow
 def inject_image_into_workflow(workflow: dict, image_filename: str, image_node_id: str = None) -> dict:
    """Inject uploaded image filename into workflow."""
    workflow = workflow.copy()
    # If specific node ID provided, use it
    if image_node_id and image_node_id in workflow:
        node = workflow[image_node_id]
        if "inputs" in node:
            node["inputs"]["image"] = image_filename
            return workflow
    # Auto-detect: find LoadImage nodes
    for node_id, node in workflow.items():
        class_type = node.get("class_type", "")
        if class_type in ["LoadImage", "LoadImageFromPath"]:
            if "inputs" in node:
                node["inputs"]["image"] = image_filename
                return workflow
    return workflow
 def queue_workflow(workflow: dict, client_id: str = None) -> str:
    """Queue workflow and return prompt_id."""
    if client_id is None:
        client_id = uuid.uuid4().hex
    payload = {
        "prompt": workflow,
        "client_id": client_id
    }
    resp = requests.post(
        f"{COMFYUI_HOST}/prompt",
        json=payload
    )
    if resp.status_code != 200:
        raise Exception(f"Failed to queue workflow: {resp.text}")
    result = resp.json()
    return result["prompt_id"]
 def get_history(prompt_id: str) -> dict:
    """Get execution history for a prompt."""
    resp = requests.get(f"{COMFYUI_HOST}/history/{prompt_id}")
    if resp.status_code != 200:
        return {}
    return resp.json()
 def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT) -> dict:
    """Poll until workflow completes or timeout."""
    start_time = time.time()
    while time.time() - start_time < timeout:
        history = get_history(prompt_id)
        if prompt_id in history:
            status = history[prompt_id].get("status", {})
            # Check for completion
            if status.get("completed", False):
                return history[prompt_id]
            # Check for error
            if status.get("status_str") == "error":
                raise Exception(f"Workflow execution failed: {status}")
        time.sleep(POLL_INTERVAL)
    raise TimeoutError(f"Workflow execution timed out after {timeout}s")
 def get_output_files(history: dict) -> list:
    """Extract output file info from history."""
    outputs = []
    if "outputs" not in history:
        return outputs
    for node_id, node_output in history["outputs"].items():
        # Handle image outputs
        if "images" in node_output:
            for img in node_output["images"]:
                outputs.append({
                    "type": "image",
                    "filename": img["filename"],
                    "subfolder": img.get("subfolder", ""),
                    "type_folder": img.get("type", "output")
                })
        # Handle video outputs (VideoHelperSuite and similar)
        if "gifs" in node_output:
            for vid in node_output["gifs"]:
                outputs.append({
                    "type": "video",
                    "filename": vid["filename"],
                    "subfolder": vid.get("subfolder", ""),
                    "type_folder": vid.get("type", "output")
                })
        # Handle generic files
        if "files" in node_output:
            for f in node_output["files"]:
                filename = f.get("filename", "")
                ext = Path(filename).suffix.lower()
                file_type = "video" if ext in [".mp4", ".webm", ".gif", ".mov"] else "image"
                outputs.append({
                    "type": file_type,
                    "filename": filename,
                    "subfolder": f.get("subfolder", ""),
                    "type_folder": f.get("type", "output")
                })
    return outputs
 def fetch_output(output_info: dict) -> bytes:
    """Fetch output file from ComfyUI."""
    params = {
        "filename": output_info["filename"],
        "subfolder": output_info["subfolder"],
        "type": output_info["type_folder"]
    }
    resp = requests.get(f"{COMFYUI_HOST}/view", params=params)
    if resp.status_code != 200:
        raise Exception(f"Failed to fetch output: {resp.status_code}")
    return resp.content
 def handler(job: dict) -> dict:
    """RunPod serverless handler."""
    job_input = job.get("input", {})
    # Validate input
    workflow = job_input.get("workflow")
    if not workflow:
        return {"error": "Missing 'workflow' in input"}
    # Ensure ComfyUI is running
    if not start_comfyui():
        return {"error": "Failed to start ComfyUI server"}
    try:
        # Handle image upload if provided
        if "image" in job_input and job_input["image"]:
            image_filename = upload_image(
                job_input["image"],
                job_input.get("image_filename")
            )
            workflow = inject_image_into_workflow(
                workflow,
                image_filename,
                job_input.get("image_node_id")
            )
        # Handle prompt injection if provided
        if "prompt" in job_input and job_input["prompt"]:
            workflow = inject_prompt_into_workflow(
                workflow,
                job_input["prompt"],
                job_input.get("prompt_node_id")
            )
        # Queue workflow
        client_id = uuid.uuid4().hex
        prompt_id = queue_workflow(workflow, client_id)
        # Poll for completion
        timeout = min(job_input.get("timeout", MAX_TIMEOUT), MAX_TIMEOUT)
        history = poll_for_completion(prompt_id, timeout)
        # Get output files
        outputs = get_output_files(history)
        if not outputs:
            return {"error": "No outputs generated"}
        # Fetch and encode outputs
        results = []
        for output_info in outputs:
            data = fetch_output(output_info)
            # Check size for video files
            if output_info["type"] == "video" and len(data) > 10 * 1024 * 1024:
                # For large videos, save to network volume and return path
                output_path = Path("/userdata/outputs") / output_info["filename"]
                output_path.parent.mkdir(parents=True, exist_ok=True)
                output_path.write_bytes(data)
                results.append({
                    "type": output_info["type"],
                    "filename": output_info["filename"],
                    "path": str(output_path),
                    "size": len(data)
                })
            else:
                # Return as base64
                results.append({
                    "type": output_info["type"],
                    "filename": output_info["filename"],
                    "data": base64.b64encode(data).decode("utf-8"),
                    "size": len(data)
                })
        return {
            "status": "success",
            "prompt_id": prompt_id,
            "outputs": results
        }
    except TimeoutError as e:
        return {"error": str(e), "status": "timeout"}
    except Exception as e:
        return {"error": str(e), "status": "error"}
 # RunPod serverless entry point
 if __name__ == "__main__":
    print("Starting ComfyUI RunPod Handler...")
    runpod.serverless.start({"handler": handler})
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,38 @@
 # Core ML dependencies
 transformers==4.56.2
 diffusers==0.35.2
 accelerate==1.11.0
 safetensors==0.6.2
 # ONNX runtime
 onnxruntime-gpu==1.23.2
 # Image/Video processing
 opencv-python==4.12.0.88
 mediapipe==0.10.14
 insightface==0.7.3
 imageio-ffmpeg==0.6.0
 av==16.0.1
 # ML utilities
 spandrel==0.4.1
 kornia==0.8.2
 einops==0.8.1
 timm==1.0.22
 peft==0.17.1
 gguf==0.17.1
 # HuggingFace transfer acceleration
 hf-transfer
 # RunPod SDK
 runpod
 # Additional common dependencies
 numpy
 scipy
 pillow
 tqdm
 requests
 aiohttp
 websocket-client