Add ComfyUI output capture for crash debugging

- Add background thread to read ComfyUI stdout in real-time - Store last 200 lines in circular buffer - Echo output to RunPod logs with [ComfyUI] prefix - Include last 100 lines in error responses for debugging - Add comfyui_output field to error responses This will help diagnose why ComfyUI crashes during generation. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 03:15:10 +00:00
parent 672381ddd0
commit 85a07fcc5f
1 changed files with 50 additions and 4 deletions
--- a/handler.py
+++ b/handler.py
@@ -20,6 +20,8 @@ import base64
 import uuid
 import subprocess
 import signal
 import threading
 from collections import deque
 import requests
 from pathlib import Path
 import runpod
@@ -42,8 +44,36 @@ NODE_STEPS = "150"
 NODE_SPLIT_STEP = "151"
 NODE_SAVE_VIDEO = "117"
-# Global ComfyUI process
+# Global ComfyUI process and output capture
 comfyui_process = None
 comfyui_output_buffer = deque(maxlen=200)  # Keep last 200 lines
 comfyui_output_lock = threading.Lock()
 comfyui_reader_thread = None
 def _read_comfyui_output():
    """Background thread to read ComfyUI stdout and store in buffer."""
    global comfyui_process, comfyui_output_buffer
    while comfyui_process is not None:
        try:
            line = comfyui_process.stdout.readline()
            if line:
                decoded = line.decode('utf-8', errors='replace').rstrip()
                with comfyui_output_lock:
                    comfyui_output_buffer.append(decoded)
                print(f"[ComfyUI] {decoded}")  # Echo to RunPod logs
            elif comfyui_process.poll() is not None:
                # Process ended
                break
        except Exception:
            break
 def get_comfyui_output(last_n: int = 50) -> list:
    """Get the last N lines of ComfyUI output."""
    with comfyui_output_lock:
        lines = list(comfyui_output_buffer)
    return lines[-last_n:] if len(lines) > last_n else lines
 class JobLogger:
@@ -68,7 +98,7 @@ class JobLogger:
 def start_comfyui(logger: JobLogger = None):
    """Start ComfyUI server if not already running."""
-    global comfyui_process
+    global comfyui_process, comfyui_reader_thread, comfyui_output_buffer
    def log(msg):
        if logger:
@@ -80,6 +110,10 @@ def start_comfyui(logger: JobLogger = None):
        log("ComfyUI server already running")
        return True
    # Clear output buffer for fresh start
    with comfyui_output_lock:
        comfyui_output_buffer.clear()
    log("Starting ComfyUI server...")
    comfyui_process = subprocess.Popen(
@@ -95,6 +129,10 @@ def start_comfyui(logger: JobLogger = None):
        preexec_fn=os.setsid if hasattr(os, 'setsid') else None
    )
    # Start background thread to capture output
    comfyui_reader_thread = threading.Thread(target=_read_comfyui_output, daemon=True)
    comfyui_reader_thread.start()
    # Wait for server to be ready
    start_time = time.time()
    last_status_time = start_time
@@ -600,13 +638,21 @@ def handler(job: dict) -> dict:
    except TimeoutError as e:
        logger.log(f"ERROR: Timeout - {str(e)}")
-        return {"error": str(e), "status": "timeout", "logs": logger.get_logs()}
+        comfyui_logs = get_comfyui_output(100)
        logger.log(f"ComfyUI output (last 100 lines):")
        for line in comfyui_logs:
            logger.log(f"  {line}")
        return {"error": str(e), "status": "timeout", "logs": logger.get_logs(), "comfyui_output": comfyui_logs}
    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        logger.log(f"ERROR: {str(e)}")
        logger.log(f"Traceback:\n{tb}")
-        return {"error": str(e), "status": "error", "logs": logger.get_logs()}
+        comfyui_logs = get_comfyui_output(100)
        logger.log(f"ComfyUI output (last 100 lines):")
        for line in comfyui_logs:
            logger.log(f"  {line}")
        return {"error": str(e), "status": "error", "logs": logger.get_logs(), "comfyui_output": comfyui_logs}
 # RunPod serverless entry point