commit a5adfe060e729e2fa577b42f894e217ccb3b529c Author: Nick Date: Thu Dec 25 21:59:09 2025 +1300 Initial commit: ComfyUI RunPod Serverless endpoint - Dockerfile with CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128 - SageAttention 2.2 compiled from source - Nunchaku wheel installation - 12 custom nodes pre-installed - Handler with image/video output support - Model symlinks to /userdata network volume 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..16db80d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,127 @@ +# ComfyUI RunPod Serverless - CUDA 12.8.1, Python 3.12, PyTorch 2.8.0 +FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 + +# CUDA environment +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH +ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LIBRARY_PATH +ENV PATH=/usr/local/cuda/bin:$PATH + +# HuggingFace cache paths (will be symlinked to network volume) +ENV HF_HOME=/workspace/.cache/huggingface +ENV HF_HUB_ENABLE_HF_TRANSFER=1 +ENV TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + python3.12 \ + python3.12-dev \ + python3.12-venv \ + python3-pip \ + git \ + git-lfs \ + wget \ + curl \ + ffmpeg \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + libgomp1 \ + build-essential \ + ninja-build \ + && rm -rf /var/lib/apt/lists/* + +# Set Python 3.12 as default +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# Upgrade pip +RUN python -m pip install --upgrade pip setuptools wheel + +# Install PyTorch 2.8.0+cu128 and triton 3.4.0 +RUN pip install \ + torch==2.8.0+cu128 \ + torchvision==0.23.0+cu128 \ + torchaudio==2.8.0+cu128 \ + --index-url https://download.pytorch.org/whl/cu128 + +RUN pip install triton==3.4.0 + +# Install nunchaku from GitHub wheel +RUN pip install https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl + +# Install key dependencies before SageAttention +COPY requirements.txt /tmp/requirements.txt +RUN pip install -r /tmp/requirements.txt + +# Compile SageAttention 2.2 from source with no build isolation +WORKDIR /tmp +ENV EXT_PARALLEL=4 +ENV NVCC_APPEND_FLAGS="--threads 8" +ENV MAX_JOBS=32 +RUN git clone https://github.com/thu-ml/SageAttention.git && \ + cd SageAttention && \ + pip install --no-build-isolation -e . + +# Clone ComfyUI +WORKDIR /workspace +RUN git clone https://github.com/comfyanonymous/ComfyUI.git && \ + cd ComfyUI && \ + pip install -r requirements.txt + +# Install custom nodes +WORKDIR /workspace/ComfyUI/custom_nodes +RUN git clone https://github.com/ltdrdata/ComfyUI-Manager.git && \ + git clone https://github.com/jnxmx/ComfyUI_HuggingFace_Downloader.git && \ + git clone https://github.com/kijai/ComfyUI-KJNodes.git && \ + git clone https://github.com/Fannovel16/comfyui_controlnet_aux.git && \ + git clone https://github.com/crystian/ComfyUI-Crystools.git && \ + git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git && \ + git clone https://github.com/willmiao/ComfyUI-Lora-Manager.git && \ + git clone https://github.com/city96/ComfyUI-GGUF.git && \ + git clone https://github.com/Fannovel16/ComfyUI-Frame-Interpolation.git && \ + git clone https://github.com/nunchaku-tech/ComfyUI-nunchaku.git && \ + git clone https://github.com/evanspearman/ComfyMath.git && \ + git clone https://github.com/ssitu/ComfyUI_UltimateSDUpscale.git + +# Install custom node dependencies +RUN cd ComfyUI-KJNodes && pip install -r requirements.txt || true +RUN cd comfyui_controlnet_aux && pip install -r requirements.txt || true +RUN cd ComfyUI-VideoHelperSuite && pip install -r requirements.txt || true +RUN cd ComfyUI-GGUF && pip install -r requirements.txt || true +RUN cd ComfyUI-Frame-Interpolation && pip install -r requirements.txt || true +RUN cd ComfyUI-nunchaku && pip install -r requirements.txt || true + +# Create directories and symlinks to network volume +WORKDIR /workspace/ComfyUI +RUN mkdir -p /userdata/models/checkpoints \ + /userdata/models/loras \ + /userdata/models/vae \ + /userdata/models/controlnet \ + /userdata/models/clip \ + /userdata/models/upscale_models \ + /userdata/.cache/huggingface \ + /workspace/.cache + +# Symlink model directories to /userdata +RUN rm -rf models/checkpoints && ln -s /userdata/models/checkpoints models/checkpoints && \ + rm -rf models/loras && ln -s /userdata/models/loras models/loras && \ + rm -rf models/vae && ln -s /userdata/models/vae models/vae && \ + rm -rf models/controlnet && ln -s /userdata/models/controlnet models/controlnet && \ + rm -rf models/clip && ln -s /userdata/models/clip models/clip && \ + rm -rf models/upscale_models && ln -s /userdata/models/upscale_models models/upscale_models + +# Symlink HuggingFace cache +RUN ln -s /userdata/.cache/huggingface /workspace/.cache/huggingface + +# Copy handler +WORKDIR /workspace +COPY handler.py /workspace/handler.py + +# RunPod handler entrypoint +CMD ["python", "-u", "handler.py"] diff --git a/PROJECT.md b/PROJECT.md new file mode 100644 index 0000000..b6ae73a --- /dev/null +++ b/PROJECT.md @@ -0,0 +1,163 @@ +# ComfyUI RunPod Serverless Project + +## Project Overview +Build a RunPod Serverless endpoint running ComfyUI with SageAttention 2.2 for image/video generation. Self-hosted frontend will call the RunPod API. + +## Architecture +- **RunPod Serverless**: Hosts ComfyUI worker with GPU inference +- **Network Volume**: Mounts at `/userdata` +- **Gitea Registry**: Hosts Docker image +- **Frontend**: Self-hosted on home server, calls RunPod API over HTTPS + +## Reference Environment (extracted from working pod) + +### Base System +- Ubuntu 22.04.5 LTS (Jammy) +- Python 3.12.12 +- CUDA 12.8 (nvcc 12.8.93) +- cuDNN 9.8.0.87 +- NCCL 2.25.1 + +### PyTorch Stack +- torch==2.8.0+cu128 +- torchvision==0.23.0+cu128 +- torchaudio==2.8.0+cu128 +- triton==3.4.0 + +### Key Dependencies +- transformers==4.56.2 +- diffusers==0.35.2 +- accelerate==1.11.0 +- safetensors==0.6.2 +- onnxruntime-gpu==1.23.2 +- opencv-python==4.12.0.88 +- mediapipe==0.10.14 +- insightface==0.7.3 +- spandrel==0.4.1 +- kornia==0.8.2 +- einops==0.8.1 +- timm==1.0.22 +- peft==0.17.1 +- gguf==0.17.1 +- av==16.0.1 (video) +- imageio-ffmpeg==0.6.0 + +### Nunchaku (prebuilt wheel) +``` +nunchaku @ https://github.com/nunchaku-tech/nunchaku/releases/download/v1.0.2/nunchaku-1.0.2+torch2.8-cp312-cp312-linux_x86_64.whl +``` + +### ComfyUI +- Location: `/workspace/ComfyUI` +- Uses venv at `/workspace/ComfyUI/venv` +- Commit: 532e2850794c7b497174a0a42ac0cb1fe5b62499 (Dec 24, 2025) + +### Custom Nodes (from CUSTOM_NODES env var + actual install) +``` +ltdrdata/ComfyUI-Manager +jnxmx/ComfyUI_HuggingFace_Downloader +kijai/ComfyUI-KJNodes +Fannovel16/comfyui_controlnet_aux +crystian/ComfyUI-Crystools +Kosinkadink/ComfyUI-VideoHelperSuite +willmiao/ComfyUI-Lora-Manager +city96/ComfyUI-GGUF +Fannovel16/ComfyUI-Frame-Interpolation +nunchaku-tech/ComfyUI-nunchaku +evanspearman/ComfyMath +ssitu/ComfyUI_UltimateSDUpscale +``` + +### Environment Variables (relevant) +```bash +HF_HOME=/workspace/.cache/huggingface +HF_HUB_ENABLE_HF_TRANSFER=1 +TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers +PYTHONUNBUFFERED=1 +LD_LIBRARY_PATH=/usr/local/cuda/lib64 +LIBRARY_PATH=/usr/local/cuda/lib64/stubs +``` + +### Network Volume Mount +- Mount point: `/userdata` + +## Technical Requirements + +### SageAttention 2.2 (Critical) +Must be compiled from source with no build isolation: +```bash +git clone https://github.com/thu-ml/SageAttention.git +cd SageAttention +pip install triton +export EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 +pip install --no-build-isolation -e . +``` + +### Network Volume Structure +``` +/userdata/ +├── models/ +│ ├── checkpoints/ +│ ├── loras/ +│ ├── vae/ +│ ├── controlnet/ +│ ├── clip/ +│ └── upscale_models/ +└── .cache/ + └── huggingface/ +``` + +### Handler Requirements +- Accept JSON input: `{"image": "base64", "prompt": "string", "workflow": {}}` +- Image upload to ComfyUI if provided +- Inject prompt into workflow at specified node +- Queue workflow, poll for completion +- Return output as base64: + - Images: PNG/JPEG base64 + - Videos: MP4 base64 (or presigned URL if >10MB) +- Detect output type from workflow output node +- Timeout handling (max 600s for video generation) + +### Dockerfile Requirements +- Base: `nvidia/cuda:12.8.1-devel-ubuntu22.04` (or equivalent with CUDA 12.8 devel) +- Python 3.12 +- PyTorch 2.8.0+cu128 from pytorch index +- Install nunchaku from GitHub wheel +- Compile SageAttention with --no-build-isolation +- Symlink model directories to /userdata +- Clone and install all custom nodes +- Install ffmpeg for video handling +- Expose handler as entrypoint + +## File Structure +``` +/project +├── Dockerfile +├── handler.py +├── requirements.txt +├── scripts/ +│ └── install_custom_nodes.sh +├── workflows/ +│ └── default_workflow_api.json +└── README.md +``` + +## Tasks +1. Create Dockerfile matching reference environment (CUDA 12.8, Python 3.12, PyTorch 2.8) +2. Create requirements.txt from extracted pip freeze (pruned to essentials) +3. Create install_custom_nodes.sh for all listed custom nodes +4. Create handler.py with ComfyUI API integration (image + video output support) +5. Document deployment steps in README.md + +## Notes +- Nick is a Principal Systems Engineer, prefers direct technical communication +- Target deployment: RunPod Serverless with 5090 GPU +- Development machine: RTX 3080 (forward compatible) +- Registry: Self-hosted Gitea +- Output will likely be video - ensure ffmpeg installed and handler detects output type +- Reference pod uses venv - serverless image can install globally + +## Claude Code Init Command +``` +Read PROJECT.md fully. Build the Dockerfile first, matching the reference environment exactly: CUDA 12.8.1, Python 3.12, PyTorch 2.8.0+cu128, triton 3.4.0. Install nunchaku from the GitHub wheel URL. Compile SageAttention 2.2 with --no-build-isolation. Install all custom nodes listed. Symlink model paths to /userdata. Do not use a venv in the container. +``` diff --git a/README.md b/README.md new file mode 100644 index 0000000..14bc702 --- /dev/null +++ b/README.md @@ -0,0 +1,279 @@ +# ComfyUI RunPod Serverless + +RunPod Serverless endpoint for ComfyUI with SageAttention 2.2, supporting image and video generation workflows. + +## Stack + +- CUDA 12.8.1 / Ubuntu 22.04 +- Python 3.12 +- PyTorch 2.8.0+cu128 +- SageAttention 2.2 (compiled) +- Nunchaku 1.0.2 +- 12 custom nodes pre-installed + +## Prerequisites + +- Docker with NVIDIA runtime +- RunPod account with API key +- Network volume created in RunPod +- Container registry (Docker Hub, Gitea, etc.) + +## Build + +```bash +docker build -t comfyui-runpod:latest . +``` + +Build with specific platform (if building on ARM): + +```bash +docker build --platform linux/amd64 -t comfyui-runpod:latest . +``` + +## Push to Registry + +Docker Hub: + +```bash +docker tag comfyui-runpod:latest yourusername/comfyui-runpod:latest +docker push yourusername/comfyui-runpod:latest +``` + +Self-hosted Gitea: + +```bash +docker tag comfyui-runpod:latest git.yourdomain.com/username/comfyui-runpod:latest +docker push git.yourdomain.com/username/comfyui-runpod:latest +``` + +## Network Volume Setup + +Create a network volume in RunPod and populate with models: + +``` +/userdata/ +├── models/ +│ ├── checkpoints/ # SD, SDXL, Flux models +│ ├── loras/ # LoRA models +│ ├── vae/ # VAE models +│ ├── controlnet/ # ControlNet models +│ ├── clip/ # CLIP models +│ └── upscale_models/ # Upscaler models +└── .cache/ + └── huggingface/ # HF model cache +``` + +Upload models via RunPod pod or rclone to the network volume before deploying serverless. + +## RunPod Deployment + +1. Go to RunPod Console > Serverless > New Endpoint + +2. Configure endpoint: + - **Container Image**: `yourusername/comfyui-runpod:latest` + - **GPU**: RTX 4090, RTX 5090, or A100 recommended + - **Network Volume**: Select your volume (mounts at `/userdata`) + - **Active Workers**: 0 (scale to zero) + - **Max Workers**: Based on budget + - **Idle Timeout**: 5-10 seconds + - **Execution Timeout**: 600 seconds (for video) + +3. Deploy and note the Endpoint ID + +## API Usage + +### Endpoint URL + +``` +https://api.runpod.ai/v2/{ENDPOINT_ID}/runsync +``` + +### Headers + +``` +Authorization: Bearer {RUNPOD_API_KEY} +Content-Type: application/json +``` + +### Request Schema + +```json +{ + "input": { + "workflow": {}, + "prompt": "optional prompt text", + "image": "optional base64 image", + "prompt_node_id": "optional node id for prompt", + "image_node_id": "optional node id for image", + "timeout": 300 + } +} +``` + +### Example: Text-to-Image + +```bash +curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \ + -H "Authorization: Bearer ${RUNPOD_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "input": { + "workflow": '"$(cat workflow_api.json)"', + "prompt": "a photo of a cat in space" + } + }' +``` + +### Example: Image-to-Video + +```bash +curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/runsync" \ + -H "Authorization: Bearer ${RUNPOD_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "input": { + "workflow": '"$(cat i2v_workflow_api.json)"', + "image": "'"$(base64 -w0 input.png)"'", + "prompt": "the cat walks forward", + "timeout": 600 + } + }' +``` + +### Response Schema + +Success: + +```json +{ + "id": "job-id", + "status": "COMPLETED", + "output": { + "status": "success", + "prompt_id": "abc123", + "outputs": [ + { + "type": "video", + "filename": "output.mp4", + "data": "base64...", + "size": 1234567 + } + ] + } +} +``` + +Large files (>10MB video): + +```json +{ + "outputs": [ + { + "type": "video", + "filename": "output.mp4", + "path": "/userdata/outputs/output.mp4", + "size": 52428800 + } + ] +} +``` + +Error: + +```json +{ + "output": { + "error": "error message", + "status": "error" + } +} +``` + +## Async Execution + +For long-running video jobs, use async endpoint: + +```bash +# Submit job +curl -X POST "https://api.runpod.ai/v2/${ENDPOINT_ID}/run" \ + -H "Authorization: Bearer ${RUNPOD_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"input": {...}}' + +# Response: {"id": "job-id", "status": "IN_QUEUE"} + +# Poll for result +curl "https://api.runpod.ai/v2/${ENDPOINT_ID}/status/${JOB_ID}" \ + -H "Authorization: Bearer ${RUNPOD_API_KEY}" +``` + +## Workflow Export + +Export workflows from ComfyUI in API format: + +1. Open ComfyUI +2. Enable Dev Mode in settings +3. Click "Save (API Format)" +4. Use the exported JSON as the `workflow` parameter + +## Custom Nodes Included + +- ComfyUI-Manager +- ComfyUI_HuggingFace_Downloader +- ComfyUI-KJNodes +- comfyui_controlnet_aux +- ComfyUI-Crystools +- ComfyUI-VideoHelperSuite +- ComfyUI-Lora-Manager +- ComfyUI-GGUF +- ComfyUI-Frame-Interpolation +- ComfyUI-nunchaku +- ComfyMath +- ComfyUI_UltimateSDUpscale + +## Troubleshooting + +### Cold Start Timeout + +First request starts ComfyUI server (~30-60s). Increase idle timeout or use warm workers. + +### Out of Memory + +Reduce batch size or resolution in workflow. Use GGUF quantized models for large models. + +### Model Not Found + +Ensure models are uploaded to correct `/userdata/models/` subdirectory matching ComfyUI folder structure. + +### Video Generation Timeout + +Default max is 600s. For longer videos, split into segments or increase resolution/reduce frames. + +### Connection Refused + +ComfyUI server may have crashed. Check logs in RunPod console. Ensure workflow is valid. + +## Local Testing + +```bash +# Build +docker build -t comfyui-runpod:latest . + +# Run with GPU +docker run --gpus all -p 8188:8188 \ + -v /path/to/models:/userdata/models \ + comfyui-runpod:latest + +# Test handler +curl -X POST http://localhost:8188/runsync \ + -H "Content-Type: application/json" \ + -d '{"input": {"workflow": {...}}}' +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HF_HOME` | `/workspace/.cache/huggingface` | HuggingFace cache | +| `HF_HUB_ENABLE_HF_TRANSFER` | `1` | Fast HF downloads | +| `PYTHONUNBUFFERED` | `1` | Realtime logs | diff --git a/handler.py b/handler.py new file mode 100644 index 0000000..ac6d9f3 --- /dev/null +++ b/handler.py @@ -0,0 +1,371 @@ +""" +ComfyUI RunPod Serverless Handler +Handles image/video generation workflows with ComfyUI API +""" + +import os +import sys +import json +import time +import base64 +import uuid +import subprocess +import signal +import requests +from pathlib import Path +from urllib.parse import urljoin +import runpod + +# Configuration +COMFYUI_DIR = "/workspace/ComfyUI" +COMFYUI_PORT = 8188 +COMFYUI_HOST = f"http://127.0.0.1:{COMFYUI_PORT}" +MAX_TIMEOUT = 600 # 10 minutes max for video generation +POLL_INTERVAL = 1.0 +STARTUP_TIMEOUT = 120 + +# Global ComfyUI process +comfyui_process = None + + +def start_comfyui(): + """Start ComfyUI server if not already running.""" + global comfyui_process + + if comfyui_process is not None and comfyui_process.poll() is None: + return True + + print("Starting ComfyUI server...") + + comfyui_process = subprocess.Popen( + [ + sys.executable, "main.py", + "--listen", "127.0.0.1", + "--port", str(COMFYUI_PORT), + "--disable-auto-launch" + ], + cwd=COMFYUI_DIR, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + preexec_fn=os.setsid if hasattr(os, 'setsid') else None + ) + + # Wait for server to be ready + start_time = time.time() + while time.time() - start_time < STARTUP_TIMEOUT: + try: + resp = requests.get(f"{COMFYUI_HOST}/system_stats", timeout=2) + if resp.status_code == 200: + print("ComfyUI server ready") + return True + except requests.exceptions.RequestException: + pass + time.sleep(1) + + print("ComfyUI server failed to start") + return False + + +def stop_comfyui(): + """Stop ComfyUI server.""" + global comfyui_process + + if comfyui_process is not None: + try: + os.killpg(os.getpgid(comfyui_process.pid), signal.SIGTERM) + except (OSError, ProcessLookupError): + comfyui_process.terminate() + comfyui_process = None + + +def upload_image(image_base64: str, filename: str = None) -> str: + """Upload base64 image to ComfyUI and return the filename.""" + if filename is None: + filename = f"input_{uuid.uuid4().hex[:8]}.png" + + # Decode base64 + image_data = base64.b64decode(image_base64) + + # Upload to ComfyUI + files = { + "image": (filename, image_data, "image/png"), + } + data = { + "overwrite": "true" + } + + resp = requests.post( + f"{COMFYUI_HOST}/upload/image", + files=files, + data=data + ) + + if resp.status_code != 200: + raise Exception(f"Failed to upload image: {resp.text}") + + result = resp.json() + return result.get("name", filename) + + +def inject_prompt_into_workflow(workflow: dict, prompt: str, prompt_node_id: str = None) -> dict: + """Inject prompt text into workflow at specified node or auto-detect.""" + workflow = workflow.copy() + + # If specific node ID provided, use it + if prompt_node_id and prompt_node_id in workflow: + node = workflow[prompt_node_id] + if "inputs" in node: + # Common prompt input field names + for field in ["text", "prompt", "positive", "string"]: + if field in node["inputs"]: + node["inputs"][field] = prompt + return workflow + + # Auto-detect: find nodes that look like text/prompt inputs + prompt_node_types = [ + "CLIPTextEncode", + "CLIPTextEncodeSDXL", + "Text Multiline", + "String", + "TextInput" + ] + + for node_id, node in workflow.items(): + class_type = node.get("class_type", "") + if class_type in prompt_node_types: + if "inputs" in node: + for field in ["text", "prompt", "positive", "string"]: + if field in node["inputs"]: + # Only inject into positive prompts, skip negative + if "negative" not in node.get("_meta", {}).get("title", "").lower(): + node["inputs"][field] = prompt + return workflow + + return workflow + + +def inject_image_into_workflow(workflow: dict, image_filename: str, image_node_id: str = None) -> dict: + """Inject uploaded image filename into workflow.""" + workflow = workflow.copy() + + # If specific node ID provided, use it + if image_node_id and image_node_id in workflow: + node = workflow[image_node_id] + if "inputs" in node: + node["inputs"]["image"] = image_filename + return workflow + + # Auto-detect: find LoadImage nodes + for node_id, node in workflow.items(): + class_type = node.get("class_type", "") + if class_type in ["LoadImage", "LoadImageFromPath"]: + if "inputs" in node: + node["inputs"]["image"] = image_filename + return workflow + + return workflow + + +def queue_workflow(workflow: dict, client_id: str = None) -> str: + """Queue workflow and return prompt_id.""" + if client_id is None: + client_id = uuid.uuid4().hex + + payload = { + "prompt": workflow, + "client_id": client_id + } + + resp = requests.post( + f"{COMFYUI_HOST}/prompt", + json=payload + ) + + if resp.status_code != 200: + raise Exception(f"Failed to queue workflow: {resp.text}") + + result = resp.json() + return result["prompt_id"] + + +def get_history(prompt_id: str) -> dict: + """Get execution history for a prompt.""" + resp = requests.get(f"{COMFYUI_HOST}/history/{prompt_id}") + if resp.status_code != 200: + return {} + return resp.json() + + +def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT) -> dict: + """Poll until workflow completes or timeout.""" + start_time = time.time() + + while time.time() - start_time < timeout: + history = get_history(prompt_id) + + if prompt_id in history: + status = history[prompt_id].get("status", {}) + + # Check for completion + if status.get("completed", False): + return history[prompt_id] + + # Check for error + if status.get("status_str") == "error": + raise Exception(f"Workflow execution failed: {status}") + + time.sleep(POLL_INTERVAL) + + raise TimeoutError(f"Workflow execution timed out after {timeout}s") + + +def get_output_files(history: dict) -> list: + """Extract output file info from history.""" + outputs = [] + + if "outputs" not in history: + return outputs + + for node_id, node_output in history["outputs"].items(): + # Handle image outputs + if "images" in node_output: + for img in node_output["images"]: + outputs.append({ + "type": "image", + "filename": img["filename"], + "subfolder": img.get("subfolder", ""), + "type_folder": img.get("type", "output") + }) + + # Handle video outputs (VideoHelperSuite and similar) + if "gifs" in node_output: + for vid in node_output["gifs"]: + outputs.append({ + "type": "video", + "filename": vid["filename"], + "subfolder": vid.get("subfolder", ""), + "type_folder": vid.get("type", "output") + }) + + # Handle generic files + if "files" in node_output: + for f in node_output["files"]: + filename = f.get("filename", "") + ext = Path(filename).suffix.lower() + file_type = "video" if ext in [".mp4", ".webm", ".gif", ".mov"] else "image" + outputs.append({ + "type": file_type, + "filename": filename, + "subfolder": f.get("subfolder", ""), + "type_folder": f.get("type", "output") + }) + + return outputs + + +def fetch_output(output_info: dict) -> bytes: + """Fetch output file from ComfyUI.""" + params = { + "filename": output_info["filename"], + "subfolder": output_info["subfolder"], + "type": output_info["type_folder"] + } + + resp = requests.get(f"{COMFYUI_HOST}/view", params=params) + + if resp.status_code != 200: + raise Exception(f"Failed to fetch output: {resp.status_code}") + + return resp.content + + +def handler(job: dict) -> dict: + """RunPod serverless handler.""" + job_input = job.get("input", {}) + + # Validate input + workflow = job_input.get("workflow") + if not workflow: + return {"error": "Missing 'workflow' in input"} + + # Ensure ComfyUI is running + if not start_comfyui(): + return {"error": "Failed to start ComfyUI server"} + + try: + # Handle image upload if provided + if "image" in job_input and job_input["image"]: + image_filename = upload_image( + job_input["image"], + job_input.get("image_filename") + ) + workflow = inject_image_into_workflow( + workflow, + image_filename, + job_input.get("image_node_id") + ) + + # Handle prompt injection if provided + if "prompt" in job_input and job_input["prompt"]: + workflow = inject_prompt_into_workflow( + workflow, + job_input["prompt"], + job_input.get("prompt_node_id") + ) + + # Queue workflow + client_id = uuid.uuid4().hex + prompt_id = queue_workflow(workflow, client_id) + + # Poll for completion + timeout = min(job_input.get("timeout", MAX_TIMEOUT), MAX_TIMEOUT) + history = poll_for_completion(prompt_id, timeout) + + # Get output files + outputs = get_output_files(history) + + if not outputs: + return {"error": "No outputs generated"} + + # Fetch and encode outputs + results = [] + for output_info in outputs: + data = fetch_output(output_info) + + # Check size for video files + if output_info["type"] == "video" and len(data) > 10 * 1024 * 1024: + # For large videos, save to network volume and return path + output_path = Path("/userdata/outputs") / output_info["filename"] + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(data) + results.append({ + "type": output_info["type"], + "filename": output_info["filename"], + "path": str(output_path), + "size": len(data) + }) + else: + # Return as base64 + results.append({ + "type": output_info["type"], + "filename": output_info["filename"], + "data": base64.b64encode(data).decode("utf-8"), + "size": len(data) + }) + + return { + "status": "success", + "prompt_id": prompt_id, + "outputs": results + } + + except TimeoutError as e: + return {"error": str(e), "status": "timeout"} + except Exception as e: + return {"error": str(e), "status": "error"} + + +# RunPod serverless entry point +if __name__ == "__main__": + print("Starting ComfyUI RunPod Handler...") + runpod.serverless.start({"handler": handler}) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2ec3687 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,38 @@ +# Core ML dependencies +transformers==4.56.2 +diffusers==0.35.2 +accelerate==1.11.0 +safetensors==0.6.2 + +# ONNX runtime +onnxruntime-gpu==1.23.2 + +# Image/Video processing +opencv-python==4.12.0.88 +mediapipe==0.10.14 +insightface==0.7.3 +imageio-ffmpeg==0.6.0 +av==16.0.1 + +# ML utilities +spandrel==0.4.1 +kornia==0.8.2 +einops==0.8.1 +timm==1.0.22 +peft==0.17.1 +gguf==0.17.1 + +# HuggingFace transfer acceleration +hf-transfer + +# RunPod SDK +runpod + +# Additional common dependencies +numpy +scipy +pillow +tqdm +requests +aiohttp +websocket-client