diff --git a/Dockerfile b/Dockerfile index 76f7080..9df6ab7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,15 +62,16 @@ RUN pip install -r /tmp/requirements.txt && rm -rf /root/.cache/pip # Compile SageAttention 2.2 from source with no build isolation WORKDIR /tmp -ENV EXT_PARALLEL=1 -ENV NVCC_APPEND_FLAGS="--threads 1" -ENV MAX_JOBS=2 +ENV EXT_PARALLEL=2 +ENV NVCC_APPEND_FLAGS="--threads 2" +ENV MAX_JOBS=4 # Target RunPod GPU architectures: # 8.0 = A100, 8.6 = A10/RTX 3090, 8.9 = RTX 4090/L40, 9.0 = H100/H200 # Note: Blackwell (10.0) not yet supported by SageAttention ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" RUN git clone https://github.com/thu-ml/SageAttention.git && \ cd SageAttention && \ + git checkout 2aecfa8 && \ pip install --no-build-isolation . && \ cd / && rm -rf /tmp/SageAttention /root/.cache/pip