CI: revert already merged stuff, accidentally reverted by PR bitsandbytes-foundation#949 without reason.

wkpark · wkpark · commit c4f0b37cdc69 · 2024-02-06T05:16:54.000+09:00
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -1,9 +1,9 @@
 name: Python package
 
 on:
-  push: {}
+  push:
+    branches: [ "main" ]
   pull_request:
-    branches: [ main ]
     paths:
       - '.github/workflows/python-package.yml'
       - 'bitsandbytes/**'
@@ -17,153 +17,231 @@ on:
       - 'pytest.ini'
       - '**/*.md'
   release:
+    branches: [ "main" ]
     types: [ published ]
 
+concurrency:
+  group: cmake-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
 
   ##
   # This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
   ##
   build-shared-libs:
     strategy:
+      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
+      fail-fast: false
+
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
         arch: [x86_64, aarch64]
+        build_type: [Release]
         exclude:
           - os: windows-latest # This probably requires arm64 Windows agents
             arch: aarch64
     runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
     steps:
       # Check out code
     - uses: actions/checkout@v4
-      # On Linux we use CMake within Docker
-    - name: Setup cmake
-      uses: jwlawson/actions-setup-cmake@v1.14
-      with:
-        cmake-version: '3.26.x'
-    - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.1
-      if: ${{ startsWith(matrix.os, 'windows') }}
-      # Check out dependencies code
-    - uses: actions/checkout@v4
-      name: Check out NVidia cub
+
+    - name: Set up MSVC
+      if: matrix.os == 'windows-latest'
+      uses: ilammy/msvc-dev-cmd@v1.13.0
       with:
-        repository: nvidia/cub
-        ref: 1.11.0
-        path: dependencies/cub
-      # Compile C++ code
-    - name: Build C++
+        arch: amd64
+
+    - name: Set reusable strings
+      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
+      id: strings
       shell: bash
       run: |
-        set -ex
-        build_os=${{ matrix.os }}
-        build_arch=${{ matrix.arch }}
-        if [ ${build_os:0:6} == ubuntu -a ${build_arch} == aarch64 ]; then
-          # Allow cross-compile om aarch64
-          sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
-        fi
-        if [ ${build_os:0:5} == macos -a ${build_arch} == aarch64 ]; then
-          cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DCOMPUTE_BACKEND=cpu .
-        else
-          cmake -DCOMPUTE_BACKEND=cpu .
-        fi
-        if [ ${build_os:0:7} == windows ]; then
-          pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
+        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+
+    - name: Prep build
+      run: python3 -m pip install cmake==3.27.9 ninja setuptools wheel
+
+    - name: Prep Compilers
+      shell: bash -el {0}
+      run: |
+        if [ "${{ matrix.os }}" = "windows-latest" ]; then
+            echo CXX_COMPILER=cl >> "$GITHUB_ENV"
         else
-          make
+            echo CXX_COMPILER=g++ >> "$GITHUB_ENV"
         fi
+
+
+    - name: Configure CPU
+      run: >
+        cmake -B ${{ steps.strings.outputs.build-output-dir }}
+        -G Ninja
+        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
+        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -DCOMPUTE_BACKEND=cpu
+        -S ${{ github.workspace }}
+
+    - name: Build CPU
+      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
+
+    - name: Copy libraries
+      shell: bash
+      run: |
         mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
         ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
     - name: Upload build artifact
       uses: actions/upload-artifact@v4
       with:
-        name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
+        name: shared_library-${{ matrix.os }}-${{ matrix.arch }}
         path: output/*
-        retention-days: 7
   ##
   # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
   ##
   build-shared-libs-cuda:
     strategy:
+      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
+      fail-fast: false
+
       matrix:
         os: [ubuntu-latest, windows-latest]
+        cuda-version: ['11.8', '12.1']
         arch: [x86_64, aarch64]
-        cuda_version: ['12.1.0']
+        build_type: [Release]
         exclude:
           - os: windows-latest # This probably requires arm64 Windows agents
             arch: aarch64
+
     runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
     steps:
       # Check out code
     - uses: actions/checkout@v4
-      # Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation)
-    - name: Set up Docker multiarch
-      if: startsWith(matrix.os, 'ubuntu')
-      uses: docker/setup-qemu-action@v2
-      # On Linux we use CMake within Docker
-    - name: Setup cmake
-      if: ${{ !startsWith(matrix.os, 'linux') }}
-      uses: jwlawson/actions-setup-cmake@v1.14
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v5
       with:
-        cmake-version: '3.26.x'
-      # Windows: We install Cuda on the agent (slow)
-    - uses: Jimver/cuda-toolkit@v0.2.14
-      if: startsWith(matrix.os, 'windows')
-      id: cuda-toolkit
+        python-version: "3.10"
+
+    - name: Set up MSVC
+      if: matrix.os == 'windows-latest'
+      uses: ilammy/msvc-dev-cmd@v1.13.0
       with:
-        cuda: ${{ matrix.cuda_version }}
-        method: 'local'
-        # sub-packages: '["nvcc","cudart","nvrtc_dev","cublas_dev","cusparse_dev","visual_studio_integration"]'
-    - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.1
-      if: ${{ startsWith(matrix.os, 'windows') }}
-      # Check out dependencies code
-    - uses: actions/checkout@v4
-      name: Check out NVidia cub
+        arch: amd64
+
+    - name: Setup Mambaforge
+      uses: conda-incubator/setup-miniconda@v3.0.1
       with:
-        repository: nvidia/cub
-        ref: 1.11.0
-        path: dependencies/cub
-      # Compile C++ code
-    - name: Build C++
+        miniforge-variant: Mambaforge
+        miniforge-version: latest
+        activate-environment: bnb-env
+        use-mamba: true
+
+    - uses: conda-incubator/setup-miniconda@v3.0.1
+      with:
+        auto-update-conda: true
+        activate-environment: bnb-env
+        environment-file: environment-bnb.yml
+        use-only-tar-bz2: false
+        auto-activate-base: true
+        python-version: "3.10"
+        mamba-version: "*"
+
+    - name: Set reusable strings
+      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
+      id: strings
+      shell: bash
+      run: |
+        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+
+    - name: CUDA Toolkit
+      shell: bash -el {0}
+      run: |
+        if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then
+            # to prepare space
+            sudo rm -rf /usr/share/dotnet
+            sudo rm -rf /opt/ghc
+            sudo rm -rf /usr/local/share/boost
+        fi
+        addon=""
+        cuda_version=${{ matrix.cuda-version }}
+        [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "ubuntu-latest" ] && addon="cuda-cudart-static cuda-nvrtc"
+        [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "windows-latest" ] && addon="cuda-nvrtc"
+        [ "$cuda_version" = "11.8" ] && cuda_version="11.8.0"
+        [ "$cuda_version" = "12.1" ] && cuda_version="12.1.1"
+
+        conda install pytorch-cuda=${{ matrix.cuda-version }} -c pytorch # it's dependency not correctly resolved sometime
+        conda install cuda-python=${{ matrix.cuda-version }} cuda-libraries-dev cuda-nvcc cuda-nvtx cuda-cupti cuda-cudart cuda-cudart-dev cuda-runtime cuda-libraries $addon -c "nvidia/label/cuda-$cuda_version"
+
+        [ "${{ matrix.os }}" = "windows-latest" ] && conda install "clang>=17.0.6" "clangxx>=17.0.6" -c conda-forge
+
+        CUDA_HOME="${{ env.CONDA }}/envs/bnb-env"
+        echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV"
+        echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV"
+
+        if [ "${{ matrix.os }}" = "windows-latest" ]; then
+            echo CXX_COMPILER=cl >> "$GITHUB_ENV"
+            echo C_COMPILER=cl >> "$GITHUB_ENV"
+            # without -DCMAKE_CUDA_COMPILER=nvcc, cmake config always fail for cuda-11.8
+            echo DCMAKE_CUDA_COMPILER=-DCMAKE_CUDA_COMPILER=nvcc >> "$GITHUB_ENV"
+        else
+            echo CXX_COMPILER=g++ >> "$GITHUB_ENV"
+            echo C_COMPILER=gcc >> "$GITHUB_ENV"
+        fi
+
+        nvcc --version
+
+    - name: Update environment
+      run: mamba env update -n bnb-env -f environment-bnb.yml
+
+    - name: Prep build
+      run: python -m pip install cmake==3.27.9 ninja setuptools wheel
+
+    # TODO: the following steps (CUDA, NOBLASLT, CPU) could be moved to the matrix, so they're built in parallel
+
+    - name: Configure CUDA
+      run: >
+        cmake -B ${{ steps.strings.outputs.build-output-dir }}
+        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
+        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
+        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
+        -DCOMPUTE_BACKEND=cuda
+        -S ${{ github.workspace }}
+
+    - name: Build CUDA
+      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
+
+    - name: Configure NOBLASLT
+      run: >
+        cmake -B ${{ steps.strings.outputs.build-output-dir }}
+        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
+        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
+        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
+        -DNO_CUBLASLT=ON
+        -S ${{ github.workspace }}
+
+    - name: Build NOBLASLT
+      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
+
+    - name: Copy libraries
       shell: bash
       run: |
-        set -ex
-        build_os=${{ matrix.os }}
-        build_arch=${{ matrix.arch }}
-        for NO_CUBLASLT in ON OFF; do
-          if [ ${build_os:0:6} == ubuntu ]; then
-            image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04
-            echo "Using image $image"
-            docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
-              "apt-get update \
-              && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
-              && cmake -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} . \
-              && make"
-          else
-            cmake -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} .
-            pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
-          fi
-        done
         mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
         ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
     - name: Upload build artifact
       uses: actions/upload-artifact@v4
       with:
-        name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }}
+        name: shared_library_cuda-${{ matrix.os }}-${{ matrix.cuda-version }}-${{ matrix.arch }}
         path: output/*
-        retention-days: 7
   build-wheels:
     needs:
     - build-shared-libs
     - build-shared-libs-cuda
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
         arch: [x86_64, aarch64]
         exclude:
-          - os: windows-latest # This probably requires arm64 Windows agents
+          - os: windows-latest
             arch: aarch64
     runs-on: ${{ matrix.os }}
     steps:
@@ -174,18 +252,18 @@ jobs:
       uses: actions/download-artifact@v4
       with:
         merge-multiple: true
-        pattern: "shared_library*_${{ matrix.os }}_${{ matrix.arch }}*"
+        pattern: "shared_library*-${{ matrix.os }}-*"
         path: output/
     - name: Copy correct platform shared library
       shell: bash
       run: |
         ls -lR output/
         cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
       # Set up the Python version needed
-    - name: Set up Python ${{ matrix.python-version }}
+    - name: Set up Python 3.10
       uses: actions/setup-python@v5
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: "3.10"
         cache: pip
     - name: Install build package
       shell: bash
@@ -200,13 +278,13 @@ jobs:
     #    PYTHONPATH=. pytest --log-cli-level=DEBUG tests
     - name: Build wheel
       shell: bash
-      run: python -m build .
-    - name: Upload build artifact
+      run: python -m build . --wheel
+    - name: Upload Build Artifacts
       uses: actions/upload-artifact@v4
       with:
-        name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.python-version }}
-        path: dist/bitsandbytes-*.whl
-        retention-days: 7
+        name: bdist_wheel-${{ matrix.os }}-${{ matrix.arch }}
+        path: |
+          ${{ github.workspace }}/dist/
   publish:
     needs: build-wheels
     runs-on: ubuntu-latest
@@ -217,7 +295,7 @@ jobs:
       with:
         path: dist/
         merge-multiple: true
-        pattern: "bdist_wheel_*"
+        pattern: "bdist_wheel-*"
     - run: |
         ls -lR dist/
     - name: Publish to PyPi