Add job logging and increase timeout to 20 minutes
All checks were successful
Build and Push Frontend Docker Image / build (push) Successful in 39s
Build and Push Docker Image / build (push) Successful in 31m7s

- Add JobLogger class to handler.py for structured timestamped logging
- Increase MAX_TIMEOUT from 600s to 1200s (20 minutes)
- Add logs column to generated_content table via migration
- Store and display job execution logs in gallery UI
- Add Logs button to gallery items with modal display

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Debian
2026-01-08 02:10:55 +00:00
parent 52dd0d8766
commit 3c421cf7b8
10 changed files with 269 additions and 46 deletions

View File

@@ -766,3 +766,65 @@ body {
text-shadow: 0 1px 3px rgba(0,0,0,0.5); text-shadow: 0 1px 3px rgba(0,0,0,0.5);
pointer-events: none; pointer-events: none;
} }
/* Logs Modal */
.logs-container {
background: var(--gray-100);
border-radius: var(--radius);
padding: 15px;
}
.logs-status {
margin-bottom: 12px;
font-size: 14px;
}
.logs-status .badge {
margin-left: 8px;
}
.badge-completed { background: var(--success); color: white; }
.badge-processing { background: var(--warning); color: var(--gray-900); }
.badge-pending { background: var(--gray-500); color: white; }
.badge-failed { background: var(--danger); color: white; }
.logs-error {
background: #f8d7da;
color: #721c24;
padding: 12px;
border-radius: var(--radius);
margin-bottom: 12px;
font-size: 13px;
}
.logs-content {
font-family: 'SF Mono', Monaco, 'Andale Mono', monospace;
font-size: 12px;
line-height: 1.6;
background: var(--gray-900);
color: #e0e0e0;
padding: 15px;
border-radius: var(--radius);
max-height: 400px;
overflow-y: auto;
white-space: pre-wrap;
word-break: break-word;
}
.logs-content::-webkit-scrollbar {
width: 8px;
}
.logs-content::-webkit-scrollbar-track {
background: var(--gray-800);
border-radius: 4px;
}
.logs-content::-webkit-scrollbar-thumb {
background: var(--gray-600);
border-radius: 4px;
}
.logs-content::-webkit-scrollbar-thumb:hover {
background: var(--gray-500);
}

View File

@@ -437,6 +437,7 @@ function renderGallery(container, items) {
<div class="gallery-item-meta"> <div class="gallery-item-meta">
<span>${formatDate(item.createdAt)}</span> <span>${formatDate(item.createdAt)}</span>
<div class="gallery-item-actions"> <div class="gallery-item-actions">
<button class="btn btn-sm btn-secondary view-logs-btn" data-content-id="${item.id}">Logs</button>
${item.status === 'completed' ? `<a href="/api/content/${item.id}/download" class="btn btn-sm btn-secondary">Download</a>` : ''} ${item.status === 'completed' ? `<a href="/api/content/${item.id}/download" class="btn btn-sm btn-secondary">Download</a>` : ''}
<button class="btn btn-sm btn-danger delete-content-btn" data-content-id="${item.id}">Delete</button> <button class="btn btn-sm btn-danger delete-content-btn" data-content-id="${item.id}">Delete</button>
</div> </div>
@@ -470,6 +471,48 @@ function renderGallery(container, items) {
} }
}); });
}); });
container.querySelectorAll('.view-logs-btn').forEach(btn => {
btn.addEventListener('click', async function() {
const contentId = parseInt(this.dataset.contentId, 10);
await viewContentLogs(contentId);
});
});
}
async function viewContentLogs(contentId) {
try {
const data = await api(`/content/${contentId}/logs`);
const logs = data.logs || [];
const errorMessage = data.errorMessage;
let logsHtml = '';
if (logs.length === 0) {
logsHtml = '<p style="color:var(--gray-500);text-align:center;">No logs available</p>';
} else {
logsHtml = `<div class="logs-content">${logs.map(log => escapeHtml(log)).join('\n')}</div>`;
}
if (errorMessage) {
logsHtml = `<div class="logs-error"><strong>Error:</strong> ${escapeHtml(errorMessage)}</div>` + logsHtml;
}
showModal(`
<div class="modal-header">
<h3>Job Logs</h3>
<button class="modal-close">&times;</button>
</div>
<div class="logs-container">
<div class="logs-status">Status: <span class="badge badge-${data.status}">${data.status}</span></div>
${logsHtml}
</div>
<div class="modal-footer">
<button class="btn btn-secondary modal-cancel-btn">Close</button>
</div>
`);
} catch (error) {
alert('Failed to load logs: ' + error.message);
}
} }
// Admin // Admin

View File

@@ -50,6 +50,7 @@ function runMigrations(database: Database.Database): void {
// Migration files in order // Migration files in order
const migrations = [ const migrations = [
{ version: 1, file: '001_initial.sql' }, { version: 1, file: '001_initial.sql' },
{ version: 2, file: '002_add_logs.sql' },
]; ];
for (const migration of migrations) { for (const migration of migrations) {
@@ -131,5 +132,6 @@ export interface GeneratedContentRow {
mime_type: string; mime_type: string;
status: 'pending' | 'processing' | 'completed' | 'failed'; status: 'pending' | 'processing' | 'completed' | 'failed';
error_message: string | null; error_message: string | null;
logs: string | null;
created_at: string; created_at: string;
} }

View File

@@ -0,0 +1,2 @@
-- Add logs column to store job execution logs
ALTER TABLE generated_content ADD COLUMN logs TEXT;

View File

@@ -103,6 +103,35 @@ router.get('/:id', (req, res) => {
}); });
}); });
// Get content logs
router.get('/:id/logs', (req, res) => {
const authReq = req as AuthenticatedRequest;
const contentId = parseInt(req.params.id, 10);
if (isNaN(contentId)) {
res.status(400).json({ error: 'Invalid content ID' });
return;
}
const content = getContentById(contentId);
if (!content) {
res.status(404).json({ error: 'Content not found' });
return;
}
if (!canAccessResource(authReq.user, content.userId)) {
res.status(403).json({ error: 'Access denied' });
return;
}
res.json({
contentId: content.id,
status: content.status,
logs: content.logs || [],
errorMessage: content.errorMessage,
});
});
// Download content file // Download content file
router.get('/:id/download', (req, res) => { router.get('/:id/download', (req, res) => {
const authReq = req as AuthenticatedRequest; const authReq = req as AuthenticatedRequest;

View File

@@ -123,9 +123,16 @@ router.get('/:jobId/status', asyncHandler(async (req, res) => {
if (output.data) { if (output.data) {
// Save base64 data to file // Save base64 data to file
saveContentFile(row.id, output.data); saveContentFile(row.id, output.data);
// Also save logs if present
if (status.output.logs) {
updateContentStatus(row.id, 'completed', { logs: status.output.logs });
}
} else if (output.path) { } else if (output.path) {
// File was saved to volume - update status // File was saved to volume - update status with logs
updateContentStatus(row.id, 'completed', { fileSize: output.size }); updateContentStatus(row.id, 'completed', {
fileSize: output.size,
logs: status.output.logs,
});
} }
} }
} else if (status.status === 'FAILED') { } else if (status.status === 'FAILED') {
@@ -138,6 +145,7 @@ router.get('/:jobId/status', asyncHandler(async (req, res) => {
if (row) { if (row) {
updateContentStatus(row.id, 'failed', { updateContentStatus(row.id, 'failed', {
errorMessage: status.error || status.output?.error || 'Unknown error', errorMessage: status.error || status.output?.error || 'Unknown error',
logs: status.output?.logs,
}); });
} }
} }
@@ -182,6 +190,7 @@ router.get('/content/:contentId/status', (req, res) => {
status: content.status, status: content.status,
runpodJobId: content.runpodJobId, runpodJobId: content.runpodJobId,
errorMessage: content.errorMessage, errorMessage: content.errorMessage,
logs: content.logs,
}); });
}); });

View File

@@ -23,6 +23,7 @@ function rowToContent(row: GeneratedContentRow): GeneratedContent {
mimeType: row.mime_type, mimeType: row.mime_type,
status: row.status, status: row.status,
errorMessage: row.error_message, errorMessage: row.error_message,
logs: row.logs ? JSON.parse(row.logs) : null,
createdAt: new Date(row.created_at), createdAt: new Date(row.created_at),
}; };
} }
@@ -68,6 +69,7 @@ export function updateContentStatus(
runpodJobId?: string; runpodJobId?: string;
fileSize?: number; fileSize?: number;
errorMessage?: string; errorMessage?: string;
logs?: string[];
} }
): GeneratedContent | null { ): GeneratedContent | null {
const db = getDb(); const db = getDb();
@@ -87,6 +89,10 @@ export function updateContentStatus(
setParts.push('error_message = ?'); setParts.push('error_message = ?');
values.push(updates.errorMessage); values.push(updates.errorMessage);
} }
if (updates?.logs !== undefined) {
setParts.push('logs = ?');
values.push(JSON.stringify(updates.logs));
}
values.push(id); values.push(id);

View File

@@ -58,13 +58,21 @@ async function processStuckJobs(): Promise<void> {
const output = status.output.outputs[0]; const output = status.output.outputs[0];
if (output.data) { if (output.data) {
saveContentFile(job.id, output.data); saveContentFile(job.id, output.data);
// Also save logs if present
if (status.output.logs) {
updateContentStatus(job.id, 'completed', { logs: status.output.logs });
}
} else { } else {
updateContentStatus(job.id, 'completed', { fileSize: output.size }); updateContentStatus(job.id, 'completed', {
fileSize: output.size,
logs: status.output.logs,
});
} }
logger.info({ contentId: job.id }, 'Background processor completed job'); logger.info({ contentId: job.id }, 'Background processor completed job');
} else if (status.status === 'FAILED') { } else if (status.status === 'FAILED') {
updateContentStatus(job.id, 'failed', { updateContentStatus(job.id, 'failed', {
errorMessage: status.error || status.output?.error || 'Job failed', errorMessage: status.error || status.output?.error || 'Job failed',
logs: status.output?.logs,
}); });
logger.info({ contentId: job.id }, 'Background processor marked job as failed'); logger.info({ contentId: job.id }, 'Background processor marked job as failed');
} }

View File

@@ -68,6 +68,7 @@ export interface GeneratedContent {
mimeType: string; mimeType: string;
status: 'pending' | 'processing' | 'completed' | 'failed'; status: 'pending' | 'processing' | 'completed' | 'failed';
errorMessage: string | null; errorMessage: string | null;
logs: string[] | null;
createdAt: Date; createdAt: Date;
} }
@@ -99,6 +100,7 @@ export interface RunPodJobStatus extends RunPodJob {
size?: number; size?: number;
}>; }>;
error?: string; error?: string;
logs?: string[];
}; };
error?: string; error?: string;
} }

View File

@@ -28,7 +28,7 @@ import runpod
COMFYUI_DIR = "/workspace/ComfyUI" COMFYUI_DIR = "/workspace/ComfyUI"
COMFYUI_PORT = 8188 COMFYUI_PORT = 8188
COMFYUI_HOST = f"http://127.0.0.1:{COMFYUI_PORT}" COMFYUI_HOST = f"http://127.0.0.1:{COMFYUI_PORT}"
MAX_TIMEOUT = 600 # 10 minutes max for video generation MAX_TIMEOUT = 1200 # 20 minutes max for video generation
POLL_INTERVAL = 1.0 POLL_INTERVAL = 1.0
STARTUP_TIMEOUT = 120 STARTUP_TIMEOUT = 120
DEFAULT_WORKFLOW_PATH = "/workspace/workflows/Wan22-I2V-Remix-API.json" DEFAULT_WORKFLOW_PATH = "/workspace/workflows/Wan22-I2V-Remix-API.json"
@@ -46,14 +46,41 @@ NODE_SAVE_VIDEO = "117"
comfyui_process = None comfyui_process = None
def start_comfyui(): class JobLogger:
"""Accumulates log messages with timestamps for returning to the client."""
def __init__(self):
self.logs = []
self.start_time = time.time()
def log(self, message: str):
"""Log a message with timestamp."""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
elapsed = time.time() - self.start_time
entry = f"[{timestamp}] [{elapsed:.1f}s] {message}"
self.logs.append(entry)
print(entry) # Also print for RunPod console
def get_logs(self) -> list:
"""Return all accumulated logs."""
return self.logs
def start_comfyui(logger: JobLogger = None):
"""Start ComfyUI server if not already running.""" """Start ComfyUI server if not already running."""
global comfyui_process global comfyui_process
def log(msg):
if logger:
logger.log(msg)
else:
print(msg)
if comfyui_process is not None and comfyui_process.poll() is None: if comfyui_process is not None and comfyui_process.poll() is None:
log("ComfyUI server already running")
return True return True
print("Starting ComfyUI server...") log("Starting ComfyUI server...")
comfyui_process = subprocess.Popen( comfyui_process = subprocess.Popen(
[ [
@@ -70,17 +97,25 @@ def start_comfyui():
# Wait for server to be ready # Wait for server to be ready
start_time = time.time() start_time = time.time()
last_status_time = start_time
while time.time() - start_time < STARTUP_TIMEOUT: while time.time() - start_time < STARTUP_TIMEOUT:
try: try:
resp = requests.get(f"{COMFYUI_HOST}/system_stats", timeout=2) resp = requests.get(f"{COMFYUI_HOST}/system_stats", timeout=2)
if resp.status_code == 200: if resp.status_code == 200:
print("ComfyUI server ready") log("ComfyUI server ready")
return True return True
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
pass pass
# Log progress every 15 seconds
if time.time() - last_status_time >= 15:
elapsed = int(time.time() - start_time)
log(f"Waiting for ComfyUI startup... ({elapsed}s)")
last_status_time = time.time()
time.sleep(1) time.sleep(1)
print("ComfyUI server failed to start") log("ComfyUI server failed to start after {STARTUP_TIMEOUT}s")
return False return False
@@ -305,9 +340,16 @@ def get_history(prompt_id: str) -> dict:
return resp.json() return resp.json()
def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT) -> dict: def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT, logger: JobLogger = None) -> dict:
"""Poll until workflow completes or timeout.""" """Poll until workflow completes or timeout."""
start_time = time.time() start_time = time.time()
last_log_time = start_time
def log(msg):
if logger:
logger.log(msg)
else:
print(msg)
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
history = get_history(prompt_id) history = get_history(prompt_id)
@@ -323,6 +365,13 @@ def poll_for_completion(prompt_id: str, timeout: int = MAX_TIMEOUT) -> dict:
if status.get("status_str") == "error": if status.get("status_str") == "error":
raise Exception(f"Workflow execution failed: {status}") raise Exception(f"Workflow execution failed: {status}")
# Log progress every 30 seconds
if time.time() - last_log_time >= 30:
elapsed = int(time.time() - start_time)
remaining = timeout - elapsed
log(f"Generating... ({elapsed}s elapsed, {remaining}s remaining)")
last_log_time = time.time()
time.sleep(POLL_INTERVAL) time.sleep(POLL_INTERVAL)
raise TimeoutError(f"Workflow execution timed out after {timeout}s") raise TimeoutError(f"Workflow execution timed out after {timeout}s")
@@ -419,37 +468,55 @@ def handler(job: dict) -> dict:
"resolution": 720 (optional, default 720), "resolution": 720 (optional, default 720),
"steps": 8 (optional, default 8), "steps": 8 (optional, default 8),
"split_step": 4 (optional, default 4), "split_step": 4 (optional, default 4),
"timeout": 600 (optional, max 600), "timeout": 1200 (optional, max 1200),
"workflow": {} (optional, override default workflow) "workflow": {} (optional, override default workflow)
} }
""" """
logger = JobLogger()
job_input = job.get("input", {}) job_input = job.get("input", {})
logger.log("Job started")
# Validate required inputs # Validate required inputs
if "image" not in job_input or not job_input["image"]: if "image" not in job_input or not job_input["image"]:
return {"error": "Missing required 'image' (base64) in input"} logger.log("ERROR: Missing required 'image' (base64) in input")
return {"error": "Missing required 'image' (base64) in input", "logs": logger.get_logs()}
if "prompt" not in job_input or not job_input["prompt"]: if "prompt" not in job_input or not job_input["prompt"]:
return {"error": "Missing required 'prompt' in input"} logger.log("ERROR: Missing required 'prompt' in input")
return {"error": "Missing required 'prompt' in input", "logs": logger.get_logs()}
# Log input parameters
image_size_kb = len(job_input["image"]) * 3 / 4 / 1024 # Approximate decoded size
logger.log(f"Input image size: ~{image_size_kb:.1f} KB (base64)")
logger.log(f"Prompt: {job_input['prompt'][:100]}{'...' if len(job_input['prompt']) > 100 else ''}")
if job_input.get("negative_prompt"):
logger.log(f"Negative prompt: {job_input['negative_prompt'][:50]}...")
logger.log(f"Resolution: {job_input.get('resolution', 720)}")
logger.log(f"Steps: {job_input.get('steps', 8)}")
logger.log(f"Split step: {job_input.get('split_step', 4)}")
# Ensure ComfyUI is running # Ensure ComfyUI is running
if not start_comfyui(): if not start_comfyui(logger):
return {"error": "Failed to start ComfyUI server"} logger.log("ERROR: Failed to start ComfyUI server")
return {"error": "Failed to start ComfyUI server", "logs": logger.get_logs()}
try: try:
# Load workflow (custom or default) # Load workflow (custom or default)
if "workflow" in job_input and job_input["workflow"]: if "workflow" in job_input and job_input["workflow"]:
logger.log("Loading custom workflow")
workflow = job_input["workflow"] workflow = job_input["workflow"]
# Convert frontend format if needed
workflow = convert_frontend_to_api(workflow) workflow = convert_frontend_to_api(workflow)
else: else:
# Load and convert default workflow logger.log("Loading default workflow")
frontend_workflow = load_default_workflow() frontend_workflow = load_default_workflow()
workflow = convert_frontend_to_api(frontend_workflow) workflow = convert_frontend_to_api(frontend_workflow)
logger.log(f"Workflow loaded with {len(workflow)} nodes")
# Upload image # Upload image
image_filename = upload_image(job_input["image"]) image_filename = upload_image(job_input["image"])
print(f"Uploaded image: {image_filename}") logger.log(f"Uploaded image: {image_filename}")
# Build params for injection # Build params for injection
params = { params = {
@@ -471,48 +538,34 @@ def handler(job: dict) -> dict:
# Inject parameters into workflow # Inject parameters into workflow
workflow = inject_wan22_params(workflow, params) workflow = inject_wan22_params(workflow, params)
logger.log("Parameters injected into workflow")
# Debug: print output chain nodes to verify connections
print("=== Workflow Output Chain ===")
# Check the output chain: 117 <- 116 <- 115 <- 158 <- 140
for node_id in ["117", "116", "115"]:
if node_id in workflow:
node = workflow[node_id]
print(f"Node {node_id} ({node['class_type']}): {node['inputs']}")
else:
print(f"Node {node_id}: MISSING FROM WORKFLOW!")
print(f"Total nodes in workflow: {len(workflow)}")
# Queue workflow # Queue workflow
client_id = uuid.uuid4().hex client_id = uuid.uuid4().hex
prompt_id = queue_workflow(workflow, client_id) prompt_id = queue_workflow(workflow, client_id)
print(f"Queued workflow: {prompt_id}") logger.log(f"Workflow queued: {prompt_id}")
# Poll for completion # Poll for completion
timeout = min(job_input.get("timeout", MAX_TIMEOUT), MAX_TIMEOUT) timeout = min(job_input.get("timeout", MAX_TIMEOUT), MAX_TIMEOUT)
history = poll_for_completion(prompt_id, timeout) logger.log(f"Starting generation (timeout: {timeout}s)")
print("Workflow completed") history = poll_for_completion(prompt_id, timeout, logger)
logger.log("Workflow completed successfully")
# Debug: print history structure
print(f"History keys: {history.keys()}")
if "outputs" in history:
print(f"Output nodes: {list(history['outputs'].keys())}")
for node_id, node_out in history["outputs"].items():
print(f" Node {node_id}: {list(node_out.keys())}")
if "status" in history:
print(f"Status: {history['status']}")
# Get output files # Get output files
outputs = get_output_files(history) outputs = get_output_files(history)
if not outputs: if not outputs:
return {"error": "No outputs generated"} logger.log("ERROR: No outputs generated")
return {"error": "No outputs generated", "logs": logger.get_logs()}
logger.log(f"Found {len(outputs)} output file(s)")
# Fetch and encode outputs # Fetch and encode outputs
results = [] results = []
for output_info in outputs: for output_info in outputs:
data = fetch_output(output_info) data = fetch_output(output_info)
print(f"Fetched output: {output_info['filename']} ({len(data)} bytes)") size_mb = len(data) / 1024 / 1024
logger.log(f"Fetched output: {output_info['filename']} ({size_mb:.2f} MB)")
# Check size for video files # Check size for video files
if output_info["type"] == "video" and len(data) > 10 * 1024 * 1024: if output_info["type"] == "video" and len(data) > 10 * 1024 * 1024:
@@ -520,6 +573,7 @@ def handler(job: dict) -> dict:
output_path = Path("/runpod-volume/outputs") / output_info["filename"] output_path = Path("/runpod-volume/outputs") / output_info["filename"]
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_bytes(data) output_path.write_bytes(data)
logger.log(f"Large video saved to volume: {output_path}")
results.append({ results.append({
"type": output_info["type"], "type": output_info["type"],
"filename": output_info["filename"], "filename": output_info["filename"],
@@ -535,18 +589,24 @@ def handler(job: dict) -> dict:
"size": len(data) "size": len(data)
}) })
logger.log("Job completed successfully")
return { return {
"status": "success", "status": "success",
"prompt_id": prompt_id, "prompt_id": prompt_id,
"outputs": results "outputs": results,
"logs": logger.get_logs()
} }
except TimeoutError as e: except TimeoutError as e:
return {"error": str(e), "status": "timeout"} logger.log(f"ERROR: Timeout - {str(e)}")
return {"error": str(e), "status": "timeout", "logs": logger.get_logs()}
except Exception as e: except Exception as e:
import traceback import traceback
traceback.print_exc() tb = traceback.format_exc()
return {"error": str(e), "status": "error"} logger.log(f"ERROR: {str(e)}")
logger.log(f"Traceback:\n{tb}")
return {"error": str(e), "status": "error", "logs": logger.get_logs()}
# RunPod serverless entry point # RunPod serverless entry point