From fef4b8d7eec058eb8b7f11743a9218739e2b0a07 Mon Sep 17 00:00:00 2001
From: Nick <git@flybynight.io>
Date: Fri, 26 Dec 2025 16:58:39 +1300
Subject: [PATCH] Reduce CUDA compilation parallelism for 16GB RAM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4a01b13..f407de0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -62,9 +62,9 @@ RUN pip install -r /tmp/requirements.txt && rm -rf /root/.cache/pip
 
 # Compile SageAttention 2.2 from source with no build isolation
 WORKDIR /tmp
-ENV EXT_PARALLEL=4
-ENV NVCC_APPEND_FLAGS="--threads 8"
-ENV MAX_JOBS=32
+ENV EXT_PARALLEL=2
+ENV NVCC_APPEND_FLAGS="--threads 2"
+ENV MAX_JOBS=4
 # Target RunPod GPU architectures: H100/H200(9.0), RTX 5090/Blackwell(10.0)
 ENV TORCH_CUDA_ARCH_LIST="9.0;10.0"
 RUN git clone https://github.com/thu-ml/SageAttention.git && \