From 9f71e6db577d934f81040909ae223a865f749f0f Mon Sep 17 00:00:00 2001 From: Debian Date: Sat, 3 Jan 2026 21:57:09 +0000 Subject: [PATCH] Limit SageAttention to A100/H100 due to cross-compile issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sm90 kernels use wgmma instructions that can't be compiled for sm86/sm89 targets. Restricting to 8.0 (A100) and 9.0 (H100) only. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9df6ab7..43bfe71 100644 --- a/Dockerfile +++ b/Dockerfile @@ -66,9 +66,9 @@ ENV EXT_PARALLEL=2 ENV NVCC_APPEND_FLAGS="--threads 2" ENV MAX_JOBS=4 # Target RunPod GPU architectures: -# 8.0 = A100, 8.6 = A10/RTX 3090, 8.9 = RTX 4090/L40, 9.0 = H100/H200 -# Note: Blackwell (10.0) not yet supported by SageAttention -ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" +# 8.0 = A100, 9.0 = H100/H200 +# Note: 8.6/8.9 excluded due to SageAttention sm90 kernel cross-compile issues +ENV TORCH_CUDA_ARCH_LIST="8.0;9.0" RUN git clone https://github.com/thu-ml/SageAttention.git && \ cd SageAttention && \ git checkout 2aecfa8 && \