pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 9 additions & 0 deletions b/‎.ci/docker/build.sh‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 57 additions & 0 deletions b/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 149 additions & 0 deletions b/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_zephyr.sh‎
Lines changed: 8 additions & 0 deletions b/‎.ci/docker/common/install_zephyr.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 18 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 15 additions & 10 deletions b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 15 additions & 10 deletions
diff --git a/‎.ci/scripts/setup-arm-baremetal-tools.sh‎
Lines changed: 8 additions & 3 deletions b/‎.ci/scripts/setup-arm-baremetal-tools.sh‎
Lines changed: 8 additions & 3 deletions
@@ -67,6 +67,13 @@ case "${IMAGE_NAME}" in
     # From https://developer.android.com/ndk/downloads
     ANDROID_NDK_VERSION=r28c
     ;;
+  executorch-ubuntu-22.04-cuda-windows)
+    LINTRUNNER=""
+    GCC_VERSION=11
+    CUDA_WINDOWS_CROSS_COMPILE=yes
+    CUDA_VERSION=12.8
+    SKIP_PYTORCH=yes
+    ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
     exit 1
@@ -101,6 +108,8 @@ docker build \
   --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
   --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
   --build-arg "SKIP_PYTORCH=${SKIP_PYTORCH:-}" \
+  --build-arg "CUDA_WINDOWS_CROSS_COMPILE=${CUDA_WINDOWS_CROSS_COMPILE:-}" \
+  --build-arg "CUDA_VERSION=${CUDA_VERSION:-}" \
   -f "${OS}"/Dockerfile \
   "$@" \
   .
@@ -1 +1 @@
-732b11313b2006b4d8649500eaf5567ec6ac1e49
+f8aa919593cc51301ade73a2ee5491582521ab80
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Install Linux CUDA toolkit
+# This installs nvcc and other CUDA development tools needed for compiling CUDA code
+
+set -ex
+
+# CUDA version must be specified (e.g., 12.8)
+CUDA_VERSION="${CUDA_VERSION:?CUDA_VERSION must be set}"
+
+# Convert version format (e.g., 12.8 -> 12-8 for package names)
+CUDA_VERSION_DASH=$(echo "${CUDA_VERSION}" | tr '.' '-')
+
+# Add NVIDIA package repository
+apt-get update
+apt-get install -y --no-install-recommends \
+    gnupg2 \
+    ca-certificates \
+    wget
+
+# Download and install the CUDA keyring
+wget -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb" -O /tmp/cuda-keyring.deb
+dpkg -i /tmp/cuda-keyring.deb
+rm /tmp/cuda-keyring.deb
+
+apt-get update
+
+# Install CUDA toolkit (nvcc and development libraries)
+# We install a minimal set of packages needed for compilation:
+# - cuda-nvcc: The CUDA compiler
+# - cuda-cudart-dev: CUDA runtime development files
+# - cuda-nvrtc-dev: CUDA runtime compilation library
+# - libcublas-dev: cuBLAS development files
+# - libcusparse-dev: cuSPARSE development files
+# - libcufft-dev: cuFFT development files
+apt-get install -y --no-install-recommends \
+    "cuda-nvcc-${CUDA_VERSION_DASH}" \
+    "cuda-cudart-dev-${CUDA_VERSION_DASH}" \
+    "cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \
+    "libcublas-dev-${CUDA_VERSION_DASH}" \
+    "libcusparse-dev-${CUDA_VERSION_DASH}" \
+    "libcufft-dev-${CUDA_VERSION_DASH}"
+
+# Clean up
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+
+# Verify installation
+/usr/local/cuda-${CUDA_VERSION}/bin/nvcc --version
+
+echo "CUDA ${CUDA_VERSION} toolkit installation complete"
+echo "CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}"
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Install mingw-w64 cross-compiler and Windows CUDA toolkit for cross-compilation
+
+set -ex
+
+INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}"
+
+# Mapping of CUDA versions to their corresponding driver versions for Windows installers
+# Source: https://developer.nvidia.com/cuda-toolkit-archive
+declare -A CUDA_DRIVER_MAP=(
+    ["12.6"]="12.6.3:561.17"
+    ["12.8"]="12.8.1:572.61"
+    ["12.9"]="12.9.1:576.57"
+)
+
+install_mingw() {
+    echo "Installing mingw-w64 cross-compiler..."
+
+    apt-get update
+    # Install the POSIX threads version of mingw-w64 which supports C++11 threading
+    # primitives (std::mutex, std::condition_variable, std::shared_mutex).
+    # The default win32 threads version does not support these.
+    apt-get install -y --no-install-recommends \
+        g++-mingw-w64-x86-64-posix \
+        mingw-w64-tools \
+        p7zip-full \
+        wget
+
+    # Verify installation shows POSIX threads
+    x86_64-w64-mingw32-g++ --version
+
+    # Cleanup
+    apt-get clean
+    rm -rf /var/lib/apt/lists/*
+
+    echo "mingw-w64 installation complete (POSIX threads version)"
+}
+
+get_torch_cuda_version() {
+    # Query PyTorch for its CUDA version using conda environment
+    conda run -n "py_${PYTHON_VERSION}" python3 -c "import torch; print(torch.version.cuda)" 2>/dev/null || echo ""
+}
+
+install_windows_cuda() {
+    # Get CUDA version from torch
+    TORCH_CUDA_VERSION=$(get_torch_cuda_version)
+
+    if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
+        echo "ERROR: Could not detect CUDA version from PyTorch."
+        echo "Make sure PyTorch with CUDA support is installed before running this script."
+        exit 1
+    fi
+
+    echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
+
+    # Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
+    CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
+
+    # Look up the full version and driver version
+    if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
+        echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."
+        echo "Known versions: ${!CUDA_DRIVER_MAP[*]}"
+        exit 1
+    fi
+
+    CUDA_INFO="${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}"
+    CUDA_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f1)
+    CUDA_DRIVER_VERSION=$(echo "${CUDA_INFO}" | cut -d: -f2)
+
+    echo "Using CUDA ${CUDA_VERSION} with driver ${CUDA_DRIVER_VERSION}"
+
+    echo "Installing Windows CUDA toolkit ${CUDA_VERSION}..."
+
+    mkdir -p "${INSTALL_DIR}"
+    cd "${INSTALL_DIR}"
+
+    CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
+    CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}"
+
+    # Check if already downloaded and extracted
+    if [ -d "${INSTALL_DIR}/extracted/cuda_cudart" ]; then
+        echo "Windows CUDA toolkit already installed, skipping download..."
+        return 0
+    fi
+
+    echo "Downloading CUDA installer from ${CUDA_URL}..."
+    wget -q "${CUDA_URL}" -O "${CUDA_INSTALLER}"
+
+    echo "Extracting CUDA toolkit..."
+    7z x "${CUDA_INSTALLER}" -o"extracted" -y
+
+    # Fix permissions so ci-user can access the files
+    chmod -R a+rX "${INSTALL_DIR}"
+
+    # Clean up installer to save space
+    rm -f "${CUDA_INSTALLER}"
+
+    echo "Windows CUDA toolkit installation complete"
+    echo "WINDOWS_CUDA_HOME=${INSTALL_DIR}/extracted/cuda_cudart/cudart"
+}
+
+# Parse command line arguments
+INSTALL_MINGW=false
+INSTALL_CUDA=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --mingw)
+            INSTALL_MINGW=true
+            shift
+            ;;
+        --cuda)
+            INSTALL_CUDA=true
+            shift
+            ;;
+        --all)
+            INSTALL_MINGW=true
+            INSTALL_CUDA=true
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Usage: $0 [--mingw] [--cuda] [--all]"
+            exit 1
+            ;;
+    esac
+done
+
+# Default to installing everything if no options specified
+if [ "${INSTALL_MINGW}" = false ] && [ "${INSTALL_CUDA}" = false ]; then
+    INSTALL_MINGW=true
+    INSTALL_CUDA=true
+fi
+
+if [ "${INSTALL_MINGW}" = true ]; then
+    install_mingw
+fi
+
+if [ "${INSTALL_CUDA}" = true ]; then
+    install_windows_cuda
+fi
+
+echo "Installation complete"
@@ -2,6 +2,7 @@
 #!/bin/bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -82,6 +83,13 @@ install_prerequiresites() {
         rm -f kitware-archive.sh
     pip_install --no-cache-dir west
     pip_install pyelftools
+
+    # Zephyr SDK
+    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.4/zephyr-sdk-0.17.4_linux-x86_64.tar.xz
+    tar -xf zephyr-sdk-0.17.4_linux-x86_64.tar.xz
+    rm -f zephyr-sdk-0.17.4_linux-x86_64.tar.xz*
+    # Save setup to later and this get symlinked in to another folder in the test in trunk.yml
+    #./zephyr-sdk-0.17.4/setup.sh -c -t arm-zephyr-eabi
 }
 
 install_prerequiresites
@@ -98,5 +98,23 @@ ARG QNN_SDK
 
 ARG MEDIATEK_SDK
 
+ARG CUDA_WINDOWS_CROSS_COMPILE
+ARG CUDA_VERSION
+COPY ./common/install_cuda.sh install_cuda.sh
+COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_compile.sh
+COPY ./common/utils.sh utils.sh
+RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
+    CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
+    bash ./install_cuda_windows_cross_compile.sh; \
+    fi
+RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
+# Set up CUDA environment for Linux compilation (nvcc, etc.)
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+# Ensure system libstdc++ is found before conda's (GLIBCXX_3.4.30 compatibility)
+ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+# Windows CUDA for cross-compilation
+ENV WINDOWS_CUDA_HOME=/opt/cuda-windows/extracted/cuda_cudart/cudart
+
 USER ci-user
 CMD ["bash"]
@@ -60,11 +60,13 @@ OUTPUT_DIR="${4:-.}"
 case "$DEVICE" in
   cuda)
     ;;
+  cuda-windows)
+    ;;
   metal)
     ;;
   *)
     echo "Error: Unsupported device '$DEVICE'"
-    echo "Supported devices: cuda, metal"
+    echo "Supported devices: cuda, cuda-windows, metal"
     exit 1
     ;;
 esac
@@ -104,10 +106,6 @@ case "$HF_MODEL" in
     PREPROCESSOR_OUTPUT=""
     ;;
   nvidia/parakeet-tdt)
-    if [ "$DEVICE" = "metal" ]; then
-      echo "Error: Export for device 'metal' is not yet tested for model '$HF_MODEL'"
-      exit 1
-    fi
     MODEL_NAME="parakeet"
     TASK=""
     MAX_SEQ_LEN=""
@@ -181,7 +179,7 @@ if [ -n "$MAX_SEQ_LEN" ]; then
 fi
 
 DEVICE_ARG=""
-if [ "$DEVICE" = "cuda" ]; then
+if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
   DEVICE_ARG="--device cuda"
 fi
 
@@ -203,10 +201,17 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then
       --output_file $PREPROCESSOR_OUTPUT
 fi
 
+# Determine blob file name - cuda and cuda-windows both use aoti_cuda_blob.ptd
+if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+  BLOB_FILE="aoti_cuda_blob.ptd"
+else
+  BLOB_FILE="aoti_${DEVICE}_blob.ptd"
+fi
+
 test -f model.pte
 # CUDA saves named data to separate .ptd file, Metal embeds in .pte
-if [ "$DEVICE" = "cuda" ]; then
-  test -f aoti_cuda_blob.ptd
+if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+  test -f $BLOB_FILE
 fi
 if [ -n "$PREPROCESSOR_OUTPUT" ]; then
   test -f $PREPROCESSOR_OUTPUT
@@ -217,8 +222,8 @@ echo "::group::Store $MODEL_NAME Artifacts"
 mkdir -p "${OUTPUT_DIR}"
 mv model.pte "${OUTPUT_DIR}/"
 # CUDA saves named data to separate .ptd file, Metal embeds in .pte
-if [ "$DEVICE" = "cuda" ]; then
-  mv aoti_cuda_blob.ptd "${OUTPUT_DIR}/"
+if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+  mv $BLOB_FILE "${OUTPUT_DIR}/"
 fi
 if [ -n "$PREPROCESSOR_OUTPUT" ]; then
   mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
 
@@ -1,11 +1,16 @@
 #!/bin/bash
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024,2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # NB: This function could be used to install Arm dependencies
 # Setup arm example environment (including TOSA tools)
-git config --global user.email "github_executorch@arm.com"
-git config --global user.name "Github Executorch"
+# Configure git user only if not already set
+if ! git config --get user.name >/dev/null 2>&1; then
+    git config --global user.name "Github Executorch"
+fi
+if ! git config --get user.email >/dev/null 2>&1; then
+    git config --global user.email "github_executorch@arm.com"
+fi
 bash examples/arm/setup.sh --i-agree-to-the-contained-eula ${@:-}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-732b11313b2006b4d8649500eaf5567ec6ac1e49`
	`1`	`+f8aa919593cc51301ade73a2ee5491582521ab80`