Initial commit: ComfyUI RunPod Serverless endpoint

- Dockerfile with CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128 - SageAttention 2.2 compiled from source - Nunchaku wheel installation - 12 custom nodes pre-installed - Handler with image/video output support - Model symlinks to /userdata network volume 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-25 21:59:09 +13:00
commit a5adfe060e
5 changed files with 978 additions and 0 deletions
--- a/127
+++ b/127
@@ -0,0 +1,127 @@
+# ComfyUI RunPod Serverless - CUDA 12.8.1, Python 3.12, PyTorch 2.8.0
+FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
+
+# CUDA environment
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LIBRARY_PATH
+ENV PATH=/usr/local/cuda/bin:$PATH
+
+# HuggingFace cache paths (will be symlinked to network volume)
+ENV HF_HOME=/workspace/.cache/huggingface
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+ENV TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python3.12 \
+    python3.12-dev \
+    python3.12-venv \
+    python3-pip \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    ffmpeg \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    build-essential \
+    ninja-build \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set Python 3.12 as default
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1
+
+# Upgrade pip
+RUN python -m pip install --upgrade pip setuptools wheel
+
+# Install PyTorch 2.8.0+cu128 and triton 3.4.0
+RUN pip install \
+    torch==2.8.0+cu128 \
+    torchvision==0.23.0+cu128 \
+    torchaudio==2.8.0+cu128 \
+    --index-url https://download.pytorch.org/whl/cu128
+
+RUN pip install triton==3.4.0
+
+# Install nunchaku from GitHub wheel
+RUN pip install https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl
+
+# Install key dependencies before SageAttention
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+
+# Compile SageAttention 2.2 from source with no build isolation
+WORKDIR /tmp
+ENV EXT_PARALLEL=4
+ENV NVCC_APPEND_FLAGS="--threads 8"
+ENV MAX_JOBS=32
+RUN git clone https://github.com/thu-ml/SageAttention.git && \
+    cd SageAttention && \
+    pip install --no-build-isolation -e .
+
+# Clone ComfyUI
+WORKDIR /workspace
+RUN git clone https://github.com/comfyanonymous/ComfyUI.git && \
+    cd ComfyUI && \
+    pip install -r requirements.txt
+
+# Install custom nodes
+WORKDIR /workspace/ComfyUI/custom_nodes
+RUN git clone https://github.com/ltdrdata/ComfyUI-Manager.git && \
+    git clone https://github.com/jnxmx/ComfyUI_HuggingFace_Downloader.git && \
+    git clone https://github.com/kijai/ComfyUI-KJNodes.git && \
+    git clone https://github.com/Fannovel16/comfyui_controlnet_aux.git && \
+    git clone https://github.com/crystian/ComfyUI-Crystools.git && \
+    git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git && \
+    git clone https://github.com/willmiao/ComfyUI-Lora-Manager.git && \
+    git clone https://github.com/city96/ComfyUI-GGUF.git && \
+    git clone https://github.com/Fannovel16/ComfyUI-Frame-Interpolation.git && \
+    git clone https://github.com/nunchaku-tech/ComfyUI-nunchaku.git && \
+    git clone https://github.com/evanspearman/ComfyMath.git && \
+    git clone https://github.com/ssitu/ComfyUI_UltimateSDUpscale.git
+
+# Install custom node dependencies
+RUN cd ComfyUI-KJNodes && pip install -r requirements.txt || true
+RUN cd comfyui_controlnet_aux && pip install -r requirements.txt || true
+RUN cd ComfyUI-VideoHelperSuite && pip install -r requirements.txt || true
+RUN cd ComfyUI-GGUF && pip install -r requirements.txt || true
+RUN cd ComfyUI-Frame-Interpolation && pip install -r requirements.txt || true
+RUN cd ComfyUI-nunchaku && pip install -r requirements.txt || true
+
+# Create directories and symlinks to network volume
+WORKDIR /workspace/ComfyUI
+RUN mkdir -p /userdata/models/checkpoints \
+    /userdata/models/loras \
+    /userdata/models/vae \
+    /userdata/models/controlnet \
+    /userdata/models/clip \
+    /userdata/models/upscale_models \
+    /userdata/.cache/huggingface \
+    /workspace/.cache
+
+# Symlink model directories to /userdata
+RUN rm -rf models/checkpoints && ln -s /userdata/models/checkpoints models/checkpoints && \
+    rm -rf models/loras && ln -s /userdata/models/loras models/loras && \
+    rm -rf models/vae && ln -s /userdata/models/vae models/vae && \
+    rm -rf models/controlnet && ln -s /userdata/models/controlnet models/controlnet && \
+    rm -rf models/clip && ln -s /userdata/models/clip models/clip && \
+    rm -rf models/upscale_models && ln -s /userdata/models/upscale_models models/upscale_models
+
+# Symlink HuggingFace cache
+RUN ln -s /userdata/.cache/huggingface /workspace/.cache/huggingface
+
+# Copy handler
+WORKDIR /workspace
+COPY handler.py /workspace/handler.py
+
+# RunPod handler entrypoint
+CMD ["python", "-u", "handler.py"]
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -0,0 +1,163 @@
+# ComfyUI RunPod Serverless Project
+
+## Project Overview
+Build a RunPod Serverless endpoint running ComfyUI with SageAttention 2.2 for image/video generation. Self-hosted frontend will call the RunPod API.
+
+## Architecture
+- **RunPod Serverless**: Hosts ComfyUI worker with GPU inference
+- **Network Volume**: Mounts at `/userdata`
+- **Gitea Registry**: Hosts Docker image
+- **Frontend**: Self-hosted on home server, calls RunPod API over HTTPS
+
+## Reference Environment (extracted from working pod)
+
+### Base System
+- Ubuntu 22.04.5 LTS (Jammy)
+- Python 3.12.12
+- CUDA 12.8 (nvcc 12.8.93)
+- cuDNN 9.8.0.87
+- NCCL 2.25.1
+
+### PyTorch Stack
+- torch==2.8.0+cu128
+- torchvision==0.23.0+cu128
+- torchaudio==2.8.0+cu128
+- triton==3.4.0
+
+### Key Dependencies
+- transformers==4.56.2
+- diffusers==0.35.2
+- accelerate==1.11.0
+- safetensors==0.6.2
+- onnxruntime-gpu==1.23.2
+- opencv-python==4.12.0.88
+- mediapipe==0.10.14
+- insightface==0.7.3
+- spandrel==0.4.1
+- kornia==0.8.2
+- einops==0.8.1
+- timm==1.0.22
+- peft==0.17.1
+- gguf==0.17.1
+- av==16.0.1 (video)
+- imageio-ffmpeg==0.6.0
+
+### Nunchaku (prebuilt wheel)
+```
+nunchaku @ https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl
+```
+
+### ComfyUI
+- Location: `/workspace/ComfyUI`
+- Uses venv at `/workspace/ComfyUI/venv`
+- Commit: 532e2850794c7b497174a0a42ac0cb1fe5b62499 (Dec 24, 2025)
+
+### Custom Nodes (from CUSTOM_NODES env var + actual install)
+```
+ltdrdata/ComfyUI-Manager
+jnxmx/ComfyUI_HuggingFace_Downloader
+kijai/ComfyUI-KJNodes
+Fannovel16/comfyui_controlnet_aux
+crystian/ComfyUI-Crystools
+Kosinkadink/ComfyUI-VideoHelperSuite
+willmiao/ComfyUI-Lora-Manager
+city96/ComfyUI-GGUF
+Fannovel16/ComfyUI-Frame-Interpolation
+nunchaku-tech/ComfyUI-nunchaku
+evanspearman/ComfyMath
+ssitu/ComfyUI_UltimateSDUpscale
+```
+
+### Environment Variables (relevant)
+```bash
+HF_HOME=/workspace/.cache/huggingface
+HF_HUB_ENABLE_HF_TRANSFER=1
+TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers
+PYTHONUNBUFFERED=1
+LD_LIBRARY_PATH=/usr/local/cuda/lib64
+LIBRARY_PATH=/usr/local/cuda/lib64/stubs
+```
+
+### Network Volume Mount
+- Mount point: `/userdata`
+
+## Technical Requirements
+
+### SageAttention 2.2 (Critical)
+Must be compiled from source with no build isolation:
+```bash
+git clone https://github.com/thu-ml/SageAttention.git
+cd SageAttention
+pip install triton
+export EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32
+pip install --no-build-isolation -e .
+```
+
+### Network Volume Structure
+```
+/userdata/
+├── models/
+│   ├── checkpoints/
+│   ├── loras/
+│   ├── vae/
+│   ├── controlnet/
+│   ├── clip/
+│   └── upscale_models/
+└── .cache/
+    └── huggingface/
+```
+
+### Handler Requirements
+- Accept JSON input: `{"image": "base64", "prompt": "string", "workflow": {}}`
+- Image upload to ComfyUI if provided
+- Inject prompt into workflow at specified node
+- Queue workflow, poll for completion
+- Return output as base64:
+  - Images: PNG/JPEG base64
+  - Videos: MP4 base64 (or presigned URL if >10MB)
+- Detect output type from workflow output node
+- Timeout handling (max 600s for video generation)
+
+### Dockerfile Requirements
+- Base: `nvidia/cuda:12.8.1-devel-ubuntu22.04` (or equivalent with CUDA 12.8 devel)
+- Python 3.12
+- PyTorch 2.8.0+cu128 from pytorch index
+- Install nunchaku from GitHub wheel
+- Compile SageAttention with --no-build-isolation
+- Symlink model directories to /userdata
+- Clone and install all custom nodes
+- Install ffmpeg for video handling
+- Expose handler as entrypoint
+
+## File Structure
+```
+/project
+├── Dockerfile
+├── handler.py
+├── requirements.txt
+├── scripts/
+│   └── install_custom_nodes.sh
+├── workflows/
+│   └── default_workflow_api.json
+└── README.md
+```
+
+## Tasks
+1. Create Dockerfile matching reference environment (CUDA 12.8, Python 3.12, PyTorch 2.8)
+2. Create requirements.txt from extracted pip freeze (pruned to essentials)
+3. Create install_custom_nodes.sh for all listed custom nodes
+4. Create handler.py with ComfyUI API integration (image + video output support)
+5. Document deployment steps in README.md
+
+## Notes
+- Nick is a Principal Systems Engineer, prefers direct technical communication
+- Target deployment: RunPod Serverless with 5090 GPU
+- Development machine: RTX 3080 (forward compatible)
+- Registry: Self-hosted Gitea
+- Output will likely be video - ensure ffmpeg installed and handler detects output type
+- Reference pod uses venv - serverless image can install globally
+
+## Claude Code Init Command
+```
+Read PROJECT.md fully. Build the Dockerfile first, matching the reference environment exactly: CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128, triton 3.4.0. Install nunchaku from the GitHub wheel URL. Compile SageAttention 2.2 with --no-build-isolation. Install all custom nodes listed. Symlink model paths to /userdata. Do not use a venv in the container.
+```
--- a/README.md
+++ b/README.md
@@ -0,0 +1,279 @@
+# ComfyUI RunPod Serverless
+
+RunPod Serverless endpoint for ComfyUI with SageAttention 2.2, supporting image and video generation workflows.
+
+## Stack
+
+- CUDA 12.8.1 / Ubuntu 22.04
+- Python 3.12
+- PyTorch 2.8.0+cu128
+- SageAttention 2.2 (compiled)
+- Nunchaku 1.0.2
+- 12 custom nodes pre-installed
+
+## Prerequisites
+
+- Docker with NVIDIA runtime
+- RunPod account with API key
+- Network volume created in RunPod
+- Container registry (Docker Hub, Gitea, etc.)
+
+## Build
+
+```bash
+docker build -t comfyui-runpod:latest .
+```
+
+Build with specific platform (if building on ARM):
+
+```bash
+docker build --platform linux/amd64 -t comfyui-runpod:latest .
+```
+
+## Push to Registry
+
+Docker Hub:
+
+```bash
+docker tag comfyui-runpod:latest yourusername/comfyui-runpod:latest
+docker push yourusername/comfyui-runpod:latest
+```
+
+Self-hosted Gitea:
+
+```bash
+docker tag comfyui-runpod:latest git.yourdomain.com/username/comfyui-runpod:latest
+docker push git.yourdomain.com/username/comfyui-runpod:latest
+```
+
+## Network Volume Setup
+
+Create a network volume in RunPod and populate with models:
+
+```
+/userdata/
+├── models/
+│   ├── checkpoints/     # SD, SDXL, Flux models
+│   ├── loras/           # LoRA models
+│   ├── vae/             # VAE models
+│   ├── controlnet/      # ControlNet models
+│   ├── clip/            # CLIP models
+│   └── upscale_models/  # Upscaler models
+└── .cache/
+    └── huggingface/     # HF model cache
+```
+
+Upload models via RunPod pod or rclone to the network volume before deploying serverless.
+
+## RunPod Deployment
+
+1. Go to RunPod Console > Serverless > New Endpoint
+
+2. Configure endpoint:
+   - **Container Image**: `yourusername/comfyui-runpod:latest`
+   - **GPU**: RTX 4090, RTX 5090, or A100 recommended
+   - **Network Volume**: Select your volume (mounts at `/userdata`)
+   - **Active Workers**: 0 (scale to zero)
+   - **Max Workers**: Based on budget
+   - **Idle Timeout**: 5-10 seconds
+   - **Execution Timeout**: 600 seconds (for video)
+
+3. Deploy and note the Endpoint ID
+
+## API Usage
+
+### Endpoint URL
+
+```
+https://api.runpod.ai/v2/{ENDPOINT_ID}/runsync
+```
+
+### Headers
+
+```
+Authorization: Bearer {RUNPOD_API_KEY}
+Content-Type: application/json
+```
+
+### Request Schema
+
+```json
+{
+  "input": {
+    "workflow": {},
+    "prompt": "optional prompt text",
+    "image": "optional base64 image",
+    "prompt_node_id": "optional node id for prompt",
+    "image_node_id": "optional node id for image",
+    "timeout": 300
+  }
+}
+```
+
+### Example: Text-to-Image
+
+```bash
+curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \
+  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": {
+      "workflow": '"$(cat workflow_api.json)"',
+      "prompt": "a photo of a cat in space"
+    }
+  }'
+```
+
+### Example: Image-to-Video
+
+```bash
+curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \
+  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": {
+      "workflow": '"$(cat i2v_workflow_api.json)"',
+      "image": "'"$(base64 -w0 input.png)"'",
+      "prompt": "the cat walks forward",
+      "timeout": 600
+    }
+  }'
+```
+
+### Response Schema
+
+Success:
+
+```json
+{
+  "id": "job-id",
+  "status": "COMPLETED",
+  "output": {
+    "status": "success",
+    "prompt_id": "abc123",
+    "outputs": [
+      {
+        "type": "video",
+        "filename": "output.mp4",
+        "data": "base64...",
+        "size": 1234567
+      }
+    ]
+  }
+}
+```
+
+Large files (>10MB video):
+
+```json
+{
+  "outputs": [
+    {
+      "type": "video",
+      "filename": "output.mp4",
+      "path": "/userdata/outputs/output.mp4",
+      "size": 52428800
+    }
+  ]
+}
+```
+
+Error:
+
+```json
+{
+  "output": {
+    "error": "error message",
+    "status": "error"
+  }
+}
+```
+
+## Async Execution
+
+For long-running video jobs, use async endpoint:
+
+```bash
+# Submit job
+curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/run" \
+  -H "Authorization: Bearer ${RUNPOD_API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d '{"input": {...}}'
+
+# Response: {"id": "job-id", "status": "IN_QUEUE"}
+
+# Poll for result
+curl "https://api.runpod.ai/v2/${ENDPOINT_ID}/status/${JOB_ID}" \
+  -H "Authorization: Bearer ${RUNPOD_API_KEY}"
+```
+
+## Workflow Export
+
+Export workflows from ComfyUI in API format:
+
+1. Open ComfyUI
+2. Enable Dev Mode in settings
+3. Click "Save (API Format)"
+4. Use the exported JSON as the `workflow` parameter
+
+## Custom Nodes Included
+
+- ComfyUI-Manager
+- ComfyUI_HuggingFace_Downloader
+- ComfyUI-KJNodes
+- comfyui_controlnet_aux
+- ComfyUI-Crystools
+- ComfyUI-VideoHelperSuite
+- ComfyUI-Lora-Manager
+- ComfyUI-GGUF
+- ComfyUI-Frame-Interpolation
+- ComfyUI-nunchaku
+- ComfyMath
+- ComfyUI_UltimateSDUpscale
+
+## Troubleshooting
+
+### Cold Start Timeout
+
+First request starts ComfyUI server (~30-60s). Increase idle timeout or use warm workers.
+
+### Out of Memory
+
+Reduce batch size or resolution in workflow. Use GGUF quantized models for large models.
+
+### Model Not Found
+
+Ensure models are uploaded to correct `/userdata/models/` subdirectory matching ComfyUI folder structure.
+
+### Video Generation Timeout
+
+Default max is 600s. For longer videos, split into segments or increase resolution/reduce frames.
+
+### Connection Refused
+
+ComfyUI server may have crashed. Check logs in RunPod console. Ensure workflow is valid.
+
+## Local Testing
+
+```bash
+# Build
+docker build -t comfyui-runpod:latest .
+
+# Run with GPU
+docker run --gpus all -p 8188:8188 \
+  -v /path/to/models:/userdata/models \
+  comfyui-runpod:latest
+
+# Test handler
+curl -X POST http://localhost:8188/runsync \
+  -H "Content-Type: application/json" \
+  -d '{"input": {"workflow": {...}}}'
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `HF_HOME` | `/workspace/.cache/huggingface` | HuggingFace cache |
+| `HF_HUB_ENABLE_HF_TRANSFER` | `1` | Fast HF downloads |
+| `PYTHONUNBUFFERED` | `1` | Realtime logs |
--- a/handler.py
+++ b/handler.py
@@ -0,0 +1,371 @@
+"""
+ComfyUI RunPod Serverless Handler
+Handles image/video generation workflows with ComfyUI API
+"""
+
+import os
+import sys
+import json
+import time
+import base64
+import uuid
+import subprocess
+import signal
+import requests
+from pathlib import Path
+from urllib.parse import urljoin
+import runpod
+
+# Configuration
+COMFYUI_DIR = "/workspace/ComfyUI"
+COMFYUI_PORT = 8188
+COMFYUI_HOST = f"http://127.0.0.1:{COMFYUI_PORT}"
+MAX_TIMEOUT = 600  # 10 minutes max for video generation
+POLL_INTERVAL = 1.0
+STARTUP_TIMEOUT = 120
+
+# Global ComfyUI process
+comfyui_process = None
+
+
+def start_comfyui():
+    """Start ComfyUI server if not already running."""
+    global comfyui_process
+
+    if comfyui_process is not None and comfyui_process.poll() is None:
+        return True
+
+    print("Starting ComfyUI server...")
+
+    comfyui_process = subprocess.Popen(
+        [
+            sys.executable, "main.py",
+            "--listen", "127.0.0.1",
+            "--port", str(COMFYUI_PORT),
+            "--disable-auto-launch"
+        ],
+        cwd=COMFYUI_DIR,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        preexec_fn=os.setsid if hasattr(os, 'setsid') else None
+    )
+
+    # Wait for server to be ready
+    start_time = time.time()
+    while time.time() - start_time < STARTUP_TIMEOUT:
+        try:
+            resp = requests.get(f"{COMFYUI_HOST}/system_stats", timeout=2)
+            if resp.status_code == 200:
+                print("ComfyUI server ready")
+                return True
+        except requests.exceptions.RequestException:
+            pass
+        time.sleep(1)
+
+    print("ComfyUI server failed to start")
+    return False
+
+
+def stop_comfyui():
+    """Stop ComfyUI server."""
+    global comfyui_process
+
+    if comfyui_process is not None:
+        try:
+            os.killpg(os.getpgid(comfyui_process.pid), signal.SIGTERM)
+        except (OSError, ProcessLookupError):
+            comfyui_process.terminate()
+        comfyui_process = None
+
+
+def upload_image(image_base64: str, filename: str = None) -> str:
+    """Upload base64 image to ComfyUI and return the filename."""
+    if filename is None:
+        filename = f"input_{uuid.uuid4().hex[:8]}.png"
+
+    # Decode base64
+    image_data = base64.b64decode(image_base64)
+
+    # Upload to ComfyUI
+    files = {
+        "image": (filename, image_data, "image/png"),
+    }
+    data = {
+        "overwrite": "true"
+    }
+
+    resp = requests.post(
+        f"{COMFYUI_HOST}/upload/image",
+        files=files,
+        data=data
+    )
+
+    if resp.status_code != 200:
+        raise Exception(f"Failed to upload image: {resp.text}")
+
+    result = resp.json()
+    return result.get("name", filename)
+
+
+def inject_prompt_into_workflow(workflow: dict, prompt: str, prompt_node_id: str = None) -> dict:
+    """Inject prompt text into workflow at specified node or auto-detect."""
+    workflow = workflow.copy()
+
+    # If specific node ID provided, use it
+    if prompt_node_id and prompt_node_id in workflow:
+        node = workflow[prompt_node_id]
+        if "inputs" in node:
+            # Common prompt input field names
+            for field in ["text", "prompt", "positive", "string"]:
+                if field in node["inputs"]:
+                    node["inputs"][field] = prompt
+                    return workflow
+
+    # Auto-detect: find nodes that look like text/prompt inputs
+    prompt_node_types = [
+        "CLIPTextEncode",
+        "CLIPTextEncodeSDXL",
+        "Text Multiline",
+        "String",
+        "TextInput"
+    ]
+
+    for node_id, node in workflow.items():
+        class_type = node.get("class_type", "")
+        if class_type in prompt_node_types:
+            if "inputs" in node:
+                for field in ["text", "prompt", "positive", "string"]:
+                    if field in node["inputs"]:
+                        # Only inject into positive prompts, skip negative
+                        if "negative" not in node.get("_meta", {}).get("title", "").lower():
+                            node["inputs"][field] = prompt
+                            return workflow
+
+    return workflow
+
+
+def inject_image_into_workflow(workflow: dict, image_filename: str, image_node_id: str = None) -> dict:
+    """Inject uploaded image filename into workflow."""
+    workflow = workflow.copy()
+
+    # If specific node ID provided, use it
+    if image_node_id and image_node_id in workflow:
+        node = workflow[image_node_id]
+        if "inputs" in node:
+            node["inputs"]["image"] = image_filename
+            return workflow
+
+    # Auto-detect: find LoadImage nodes
+    for node_id, node in workflow.items():
+        class_type = node.get("class_type", "")
+        if class_type in ["LoadImage", "LoadImageFromPath"]:
+            if "inputs" in node:
+                node["inputs"]["image"] = image_filename
+                return workflow
+
+    return workflow
+
+
+def queue_workflow(workflow: dict, client_id: str = None) -> str:
+    """Queue workflow and return prompt_id."""
+    if client_id is None:
+        client_id = uuid.uuid4().hex
+
+    payload = {
+        "prompt": workflow,
+        "client_id": client_id
+    }
+
+    resp = requests.post(
+        f"{COMFYUI_HOST}/prompt",
+        json=payload
+    )
+
+    if resp.status_code != 200:
+        raise Exception(f"Failed to queue workflow: {resp.text}")
+
+    result = resp.json()
+    return result["prompt_id"]
+
+
+def get_history(prompt_id: str) -> dict:
+    """Get execution history for a prompt."""
+    resp = requests.get(f"{COMFYUI_HOST}/history/{prompt_id}")
+    if resp.status_code != 200:
+        return {}
+    return resp.json()
+
+
+def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT) -> dict:
+    """Poll until workflow completes or timeout."""
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        history = get_history(prompt_id)
+
+        if prompt_id in history:
+            status = history[prompt_id].get("status", {})
+
+            # Check for completion
+            if status.get("completed", False):
+                return history[prompt_id]
+
+            # Check for error
+            if status.get("status_str") == "error":
+                raise Exception(f"Workflow execution failed: {status}")
+
+        time.sleep(POLL_INTERVAL)
+
+    raise TimeoutError(f"Workflow execution timed out after {timeout}s")
+
+
+def get_output_files(history: dict) -> list:
+    """Extract output file info from history."""
+    outputs = []
+
+    if "outputs" not in history:
+        return outputs
+
+    for node_id, node_output in history["outputs"].items():
+        # Handle image outputs
+        if "images" in node_output:
+            for img in node_output["images"]:
+                outputs.append({
+                    "type": "image",
+                    "filename": img["filename"],
+                    "subfolder": img.get("subfolder", ""),
+                    "type_folder": img.get("type", "output")
+                })
+
+        # Handle video outputs (VideoHelperSuite and similar)
+        if "gifs" in node_output:
+            for vid in node_output["gifs"]:
+                outputs.append({
+                    "type": "video",
+                    "filename": vid["filename"],
+                    "subfolder": vid.get("subfolder", ""),
+                    "type_folder": vid.get("type", "output")
+                })
+
+        # Handle generic files
+        if "files" in node_output:
+            for f in node_output["files"]:
+                filename = f.get("filename", "")
+                ext = Path(filename).suffix.lower()
+                file_type = "video" if ext in [".mp4", ".webm", ".gif", ".mov"] else "image"
+                outputs.append({
+                    "type": file_type,
+                    "filename": filename,
+                    "subfolder": f.get("subfolder", ""),
+                    "type_folder": f.get("type", "output")
+                })
+
+    return outputs
+
+
+def fetch_output(output_info: dict) -> bytes:
+    """Fetch output file from ComfyUI."""
+    params = {
+        "filename": output_info["filename"],
+        "subfolder": output_info["subfolder"],
+        "type": output_info["type_folder"]
+    }
+
+    resp = requests.get(f"{COMFYUI_HOST}/view", params=params)
+
+    if resp.status_code != 200:
+        raise Exception(f"Failed to fetch output: {resp.status_code}")
+
+    return resp.content
+
+
+def handler(job: dict) -> dict:
+    """RunPod serverless handler."""
+    job_input = job.get("input", {})
+
+    # Validate input
+    workflow = job_input.get("workflow")
+    if not workflow:
+        return {"error": "Missing 'workflow' in input"}
+
+    # Ensure ComfyUI is running
+    if not start_comfyui():
+        return {"error": "Failed to start ComfyUI server"}
+
+    try:
+        # Handle image upload if provided
+        if "image" in job_input and job_input["image"]:
+            image_filename = upload_image(
+                job_input["image"],
+                job_input.get("image_filename")
+            )
+            workflow = inject_image_into_workflow(
+                workflow,
+                image_filename,
+                job_input.get("image_node_id")
+            )
+
+        # Handle prompt injection if provided
+        if "prompt" in job_input and job_input["prompt"]:
+            workflow = inject_prompt_into_workflow(
+                workflow,
+                job_input["prompt"],
+                job_input.get("prompt_node_id")
+            )
+
+        # Queue workflow
+        client_id = uuid.uuid4().hex
+        prompt_id = queue_workflow(workflow, client_id)
+
+        # Poll for completion
+        timeout = min(job_input.get("timeout", MAX_TIMEOUT), MAX_TIMEOUT)
+        history = poll_for_completion(prompt_id, timeout)
+
+        # Get output files
+        outputs = get_output_files(history)
+
+        if not outputs:
+            return {"error": "No outputs generated"}
+
+        # Fetch and encode outputs
+        results = []
+        for output_info in outputs:
+            data = fetch_output(output_info)
+
+            # Check size for video files
+            if output_info["type"] == "video" and len(data) > 10 * 1024 * 1024:
+                # For large videos, save to network volume and return path
+                output_path = Path("/userdata/outputs") / output_info["filename"]
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                output_path.write_bytes(data)
+                results.append({
+                    "type": output_info["type"],
+                    "filename": output_info["filename"],
+                    "path": str(output_path),
+                    "size": len(data)
+                })
+            else:
+                # Return as base64
+                results.append({
+                    "type": output_info["type"],
+                    "filename": output_info["filename"],
+                    "data": base64.b64encode(data).decode("utf-8"),
+                    "size": len(data)
+                })
+
+        return {
+            "status": "success",
+            "prompt_id": prompt_id,
+            "outputs": results
+        }
+
+    except TimeoutError as e:
+        return {"error": str(e), "status": "timeout"}
+    except Exception as e:
+        return {"error": str(e), "status": "error"}
+
+
+# RunPod serverless entry point
+if __name__ == "__main__":
+    print("Starting ComfyUI RunPod Handler...")
+    runpod.serverless.start({"handler": handler})
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,38 @@
+# Core ML dependencies
+transformers==4.56.2
+diffusers==0.35.2
+accelerate==1.11.0
+safetensors==0.6.2
+
+# ONNX runtime
+onnxruntime-gpu==1.23.2
+
+# Image/Video processing
+opencv-python==4.12.0.88
+mediapipe==0.10.14
+insightface==0.7.3
+imageio-ffmpeg==0.6.0
+av==16.0.1
+
+# ML utilities
+spandrel==0.4.1
+kornia==0.8.2
+einops==0.8.1
+timm==1.0.22
+peft==0.17.1
+gguf==0.17.1
+
+# HuggingFace transfer acceleration
+hf-transfer
+
+# RunPod SDK
+runpod
+
+# Additional common dependencies
+numpy
+scipy
+pillow
+tqdm
+requests
+aiohttp
+websocket-client