Reduce CUDA compilation parallelism for 16GB RAM

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-26 16:58:39 +13:00
parent 7a8b59f471
commit fef4b8d7ee
1 changed files with 3 additions and 3 deletions
--- a/6
+++ b/6
@@ -62,9 +62,9 @@ RUN pip install -r /tmp/requirements.txt && rm -rf /root/.cache/pip
 # Compile SageAttention 2.2 from source with no build isolation
 WORKDIR /tmp
-ENV EXT_PARALLEL=4
+ENV EXT_PARALLEL=2
-ENV NVCC_APPEND_FLAGS="--threads 8"
+ENV NVCC_APPEND_FLAGS="--threads 2"
-ENV MAX_JOBS=32
+ENV MAX_JOBS=4
 # Target RunPod GPU architectures: H100/H200(9.0), RTX 5090/Blackwell(10.0)
 ENV TORCH_CUDA_ARCH_LIST="9.0;10.0"
 RUN git clone https://github.com/thu-ml/SageAttention.git && \