lemonade-sdk · ramkrishna2910 · Feb 9, 2026 · Jan 29, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.github/workflows/cpp_server_build_test_release.yml b/.github/workflows/cpp_server_build_test_release.yml
@@ -372,10 +372,16 @@ jobs:
             test_type: whisper
             backend: npu
             runner: [rai300_400, Windows]
-          # Stable Diffusion
-          - name: stable-diffusion
+          # Stable Diffusion (CPU)
+          - name: stable-diffusion (cpu)
             test_type: sd
+            backend: ""
             runner: [rai300_400, Windows]
+          # Stable Diffusion (ROCm)
+          - name: stable-diffusion (rocm)
+            test_type: sd
+            backend: rocm
+            runner: [stx-halo, Windows]
           # Text to speech
           - name: text-to-speech
             test_type: tts
@@ -438,7 +444,11 @@ jobs:
                   }
               }
               "sd" {
-                  & $venvPython test/server_sd.py --server-binary $serverExe
+                  if ("${{ matrix.backend }}") {
+                      & $venvPython test/server_sd.py --backend ${{ matrix.backend }} --server-binary $serverExe
+                  } else {
+                      & $venvPython test/server_sd.py --server-binary $serverExe
+                  }
               }
               "tts" {
                   & $venvPython test/server_tts.py --server-binary $serverExe
@@ -473,9 +483,10 @@ jobs:
             test_type: llm
             backend: rocm
             runner: [stx-halo, Linux]
-          # Stable Diffusion
-          - name: stable-diffusion
+          # Stable Diffusion (CPU)
+          - name: stable-diffusion (cpu)
             test_type: sd
+            backend: ""
             runner: [rai300_400, Linux]
           # Text to speech
           - name: text-to-speech
@@ -522,7 +533,11 @@ jobs:
               $VENV_PYTHON test/server_llm.py --wrapped-server llamacpp --backend ${{ matrix.backend }} --server-binary lemonade-server
               ;;
             sd)
-              $VENV_PYTHON test/server_sd.py --server-binary lemonade-server
+              if [ -n "${{ matrix.backend }}" ]; then
+                $VENV_PYTHON test/server_sd.py --backend ${{ matrix.backend }} --server-binary lemonade-server
+              else
+                $VENV_PYTHON test/server_sd.py --server-binary lemonade-server
+              fi
               ;;
             tts)
               $VENV_PYTHON test/server_tts.py --server-binary lemonade-server

diff --git a/examples/api_image_generation_rocm.py b/examples/api_image_generation_rocm.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""
+ROCm GPU Image Generation Example
+
+Demonstrates AMD GPU-accelerated image generation with stable-diffusion.cpp.
+
+Prerequisites:
+    pip install openai requests
+    lemonade-server serve --sdcpp rocm  # Start server with ROCm backend
+
+Usage:
+    python api_image_gen_rocm.py
+"""
+
+import base64
+import time
+import requests
+from openai import OpenAI
+
+# Connect to local lemonade server
+BASE_URL = "http://localhost:8000/api/v1"
+client = OpenAI(base_url=BASE_URL, api_key="not-needed")
+
+# Models to test (name, steps, cfg_scale)
+MODELS = [
+    ("SD-Turbo", 4, 1.0),
+    ("SD-1.5", 20, 7.0),
+    ("SDXL-Turbo", 4, 1.0),
+    ("SDXL-Base-1.0", 20, 7.0),
+]
+
+prompt = "A majestic dragon breathing fire over a medieval castle"
+
+print("Testing ROCm-accelerated image generation\n")
+
+for model, steps, cfg_scale in MODELS:
+    print(f"Generating with {model}...")
+
+    # Load model with ROCm backend
+    requests.post(
+        f"{BASE_URL}/load",
+        json={"model_name": model, "sd-cpp_backend": "rocm"},
+        timeout=300,
+    )
+
+    # Generate image
+    start = time.time()
+    response = client.images.generate(
+        model=model,
+        prompt=prompt,
+        size="512x512",
+        n=1,
+        response_format="b64_json",
+        extra_body={"steps": steps, "cfg_scale": cfg_scale},
+    )
+    elapsed = time.time() - start
+
+    # Save image
+    image_data = base64.b64decode(response.data[0].b64_json)
+    filename = f"{model.lower().replace('-', '_')}_rocm.png"
+    with open(filename, "wb") as f:
+        f.write(image_data)
+
+    print(f"  ✓ Generated in {elapsed:.2f}s → {filename}\n")
+
+print("Done!")
diff --git a/src/cpp/include/lemon/backends/sd_server.h b/src/cpp/include/lemon/backends/sd_server.h
@@ -13,7 +13,8 @@ namespace backends {
 class SDServer : public WrappedServer, public IImageServer {
 public:
     explicit SDServer(const std::string& log_level = "info",
-                      ModelManager* model_manager = nullptr);
+                      ModelManager* model_manager = nullptr,
+                      const std::string& backend = "cpu");
 
     ~SDServer() override;
 
@@ -39,6 +40,9 @@ class SDServer : public WrappedServer, public IImageServer {
     // IImageServer implementation
     json image_generations(const json& request) override;
 
+private:
+    std::string backend_;
+
 private:
     // Server executable helper
     std::string find_executable_in_install_dir(const std::string& install_dir);

diff --git a/src/cpp/resources/backend_versions.json b/src/cpp/resources/backend_versions.json
@@ -12,7 +12,10 @@
     "rocm": "v1.8.2",
     "npu": "v1.8.2"
   },
-  "sd-cpp": "master-471-7010bb4",
+  "sd-cpp": {
+    "cpu": "master-471-7010bb4",
+    "rocm": "master-487-43e829f"
+  },
   "ryzenai-server": "v1.0.2",
   "flm": {
     "version": "v0.9.27",

diff --git a/src/cpp/server/backends/sd_server.cpp b/src/cpp/server/backends/sd_server.cpp
@@ -5,6 +5,7 @@
 #include "lemon/utils/path_utils.h"
 #include "lemon/utils/json_utils.h"
 #include "lemon/error_types.h"
+#include "lemon/system_info.h"
 #include <httplib.h>
 #include <iostream>
 #include <filesystem>
@@ -18,18 +19,30 @@ namespace lemon {
 namespace backends {
 
 // Helper to get stable-diffusion.cpp version from configuration
-static std::string get_sd_version() {
+static std::string get_sd_version(const std::string& backend = "cpu") {
     std::string config_path = utils::get_resource_path("resources/backend_versions.json");
 
     try {
         json config = utils::JsonUtils::load_from_file(config_path);
 
-        if (!config.contains("sd-cpp") || !config["sd-cpp"].is_string()) {
-            throw std::runtime_error("backend_versions.json is missing 'sd-cpp' version");
+        if (!config.contains("sd-cpp")) {
+            throw std::runtime_error("backend_versions.json is missing 'sd-cpp'");
         }
 
-        std::string version = config["sd-cpp"].get<std::string>();
-        std::cout << "[SDServer] Using sd-cpp version from config: " << version << std::endl;
+        std::string version;
+        // Support both old string format and new object format with backend keys
+        if (config["sd-cpp"].is_string()) {
+            version = config["sd-cpp"].get<std::string>();
+        } else if (config["sd-cpp"].is_object()) {
+            if (!config["sd-cpp"].contains(backend)) {
+                throw std::runtime_error("backend_versions.json sd-cpp missing backend: " + backend);
+            }
+            version = config["sd-cpp"][backend].get<std::string>();
+        } else {
+            throw std::runtime_error("backend_versions.json 'sd-cpp' has invalid format");
+        }
+
+        std::cout << "[SDServer] Using sd-cpp version for backend '" << backend << "': " << version << std::endl;
         return version;
 
     } catch (const std::exception& e) {
@@ -46,16 +59,19 @@ static std::string get_sd_version() {
 }
 
 // Helper to get the install directory for sd executable
-static std::string get_sd_install_dir() {
-    return (fs::path(get_downloaded_bin_dir()) / "sd-cpp").string();
+static std::string get_sd_install_dir(const std::string& backend = "cpu") {
+    return (fs::path(get_downloaded_bin_dir()) / "sd-cpp" / backend).string();
 }
 
 SDServer::SDServer(const std::string& log_level,
-                   ModelManager* model_manager)
-    : WrappedServer("sd-server", log_level, model_manager) {
+                   ModelManager* model_manager,
+                   const std::string& backend)
+    : WrappedServer("sd-server", log_level, model_manager),
+      backend_(backend) {
 
     if (is_debug()) {
-        std::cout << "[SDServer] Created with log_level=" << log_level << std::endl;
+        std::cout << "[SDServer] Created with log_level=" << log_level
+                  << ", backend=" << backend << std::endl;
     }
 }
 
@@ -132,7 +148,7 @@ std::string SDServer::find_executable_in_install_dir(const std::string& install_
 
 
 void SDServer::install(const std::string& /* backend */) {
-    std::string install_dir = get_sd_install_dir();
+    std::string install_dir = get_sd_install_dir(backend_);
 
     // Check if already installed
     std::string exe_path = find_executable_in_install_dir(install_dir);
@@ -141,10 +157,10 @@ void SDServer::install(const std::string& /* backend */) {
         return;
     }
 
-    std::cout << "[SDServer] Installing stable-diffusion.cpp server..." << std::endl;
+    std::cout << "[SDServer] Installing stable-diffusion.cpp server (backend: " << backend_ << ")..." << std::endl;
 
     // Get version and construct download URL
-    std::string expected_version = get_sd_version();
+    std::string expected_version = get_sd_version(backend_);
     std::string repo = "leejet/stable-diffusion.cpp";
 
     // Transform version for URL (master-NNN-HASH -> master-HASH)
@@ -158,7 +174,28 @@ void SDServer::install(const std::string& /* backend */) {
         }
     }
 
+    // ADDED: ROCm backend selection for AMD GPU support
     std::string filename;
+    if (backend_ == "rocm") {
+        // Validate ROCm architecture support
+        std::string target_arch = lemon::SystemInfo::get_rocm_arch();
+        if (target_arch.empty()) {
+            throw std::runtime_error(
+                lemon::SystemInfo::get_unsupported_backend_error("sd-cpp", "rocm")
+            );
+        }
+
+
+#ifdef _WIN32
+    filename = "sd-" + short_version + "-bin-win-rocm-x64.zip";
+#elif defined(__linux__)
+    filename = "sd-" + short_version + "-bin-linux-rocm-x64.zip";
+#else
+    throw std::runtime_error("ROCm sd.cpp only supported on Windows and Linux");
+#endif
+    std::cout << "[SDServer] Using ROCm GPU backend" << std::endl;
+    } else {
+        // CPU build (default) - unchanged from original
 #ifdef _WIN32
     // Windows CPU build with AVX2
     filename = "sd-" + short_version + "-bin-win-avx2-x64.zip";
@@ -171,6 +208,7 @@ void SDServer::install(const std::string& /* backend */) {
 #else
     throw std::runtime_error("Unsupported platform for stable-diffusion.cpp");
 #endif
+    }
 
     std::string url = "https://github.com/" + repo + "/releases/download/" +
                      expected_version + "/" + filename;
@@ -331,7 +369,7 @@ void SDServer::load(const std::string& model_name,
     model_path_ = model_path;
 
     // Get sd-server executable path
-    std::string exe_path = find_executable_in_install_dir(get_sd_install_dir());
+    std::string exe_path = find_executable_in_install_dir(get_sd_install_dir(backend_));
     if (exe_path.empty()) {
         throw std::runtime_error("sd-server executable not found");
     }
@@ -342,7 +380,7 @@ void SDServer::load(const std::string& model_name,
         throw std::runtime_error("Failed to find an available port");
     }
 
-    std::cout << "[SDServer] Starting server on port " << port_ << std::endl;
+    std::cout << "[SDServer] Starting server on port " << port_ << " (backend: " << backend_ << ")" << std::endl;
 
     // Build command line arguments
     std::vector<std::string> args = {
@@ -354,10 +392,12 @@ void SDServer::load(const std::string& model_name,
         args.push_back("-v");
     }
 
-    // Set up environment variables for Linux (LD_LIBRARY_PATH)
+    // Set up environment variables
     std::vector<std::pair<std::string, std::string>> env_vars;
-#ifndef _WIN32
     fs::path exe_dir = fs::path(exe_path).parent_path();
+
+#ifndef _WIN32
+    // For Linux, always set LD_LIBRARY_PATH to include executable directory
     std::string lib_path = exe_dir.string();
 
     const char* existing_ld_path = std::getenv("LD_LIBRARY_PATH");
@@ -369,6 +409,21 @@ void SDServer::load(const std::string& model_name,
     if (is_debug()) {
         std::cout << "[SDServer] Setting LD_LIBRARY_PATH=" << lib_path << std::endl;
     }
+#else
+    // ROCm builds on Windows require hipblaslt.dll, rocblas.dll, amdhip64.dll, etc.
+    // These DLLs are distributed alongside sd-server.exe but need PATH to be set for loading
+    if (backend_ == "rocm") {
+        // Add executable directory to PATH for ROCm runtime DLLs
+        // This allows the sd-server.exe to find required HIP/ROCm libraries at runtime
+        std::string new_path = exe_dir.string();
+        const char* existing_path = std::getenv("PATH");
+        if (existing_path && strlen(existing_path) > 0) {
+            new_path = new_path + ";" + std::string(existing_path);
+        }
+        env_vars.push_back({"PATH", new_path});
+
+        std::cout << "[SDServer] ROCm backend: added " << exe_dir.string() << " to PATH" << std::endl;
+    }
 #endif
 
     // Launch the server process

diff --git a/src/cpp/server/recipe_options.cpp b/src/cpp/server/recipe_options.cpp
@@ -15,6 +15,7 @@ static const json DEFAULTS = {
     {"llamacpp_backend", "vulkan"},  // Will be overridden dynamically
 #endif
     {"llamacpp_args", ""},
+    {"sd-cpp_backend", "cpu"},  // sd.cpp backend selection (cpu or rocm)
     {"whispercpp_backend", "npu"},
     // Image generation defaults (for sd-cpp recipe)
     {"steps", 20},
@@ -44,6 +45,14 @@ static const json CLI_OPTIONS = {
         {"envname", "LEMONADE_LLAMACPP_ARGS"},
         {"help", "Custom arguments to pass to llama-server (must not conflict with managed args)"}
     }},
+    // sd.cpp backend selection option
+    {"--sdcpp", {
+        {"option_name", "sd-cpp_backend"},
+        {"type_name", "BACKEND"},
+        {"allowed_values", {"cpu", "rocm"}},
+        {"envname", "LEMONADE_SDCPP"},
+        {"help", "SD.cpp backend to use (cpu for CPU, rocm for AMD GPU)"}
+    }},
     // ASR options
     {"--whispercpp", {
         {"option_name", "whispercpp_backend"},
@@ -87,7 +96,7 @@ static std::vector<std::string> get_keys_for_recipe(const std::string& recipe) {
     } else if (recipe == "oga-npu" || recipe == "oga-hybrid" || recipe == "oga-cpu" || recipe == "ryzenai" || recipe == "flm") {
         return {"ctx_size"};
     } else if (recipe == "sd-cpp") {
-        return {"steps", "cfg_scale", "width", "height"};
+        return {"sd-cpp_backend", "steps", "cfg_scale", "width", "height"};
     } else {
         return {};
     }

diff --git a/src/cpp/server/router.cpp b/src/cpp/server/router.cpp
@@ -160,8 +160,13 @@ std::unique_ptr<WrappedServer> Router::create_backend_server(const ModelInfo& mo
         std::cout << "[Router] Creating Kokoro backend" << std::endl;
         new_server = std::make_unique<backends::KokoroServer>(log_level_, model_manager_);
     } else if (model_info.recipe == "sd-cpp") {
-        std::cout << "[Router] Creating SDServer backend" << std::endl;
-        new_server = std::make_unique<backends::SDServer>(log_level_, model_manager_);
+        // Pass sd-cpp_backend from default_options_ to SDServer
+        std::string backend = "cpu";  // default
+        if (default_options_.contains("sd-cpp_backend") && default_options_["sd-cpp_backend"].is_string()) {
+            backend = default_options_["sd-cpp_backend"].get<std::string>();
+        }
+        std::cout << "[Router] Creating SDServer backend (backend: " << backend << ")" << std::endl;
+        new_server = std::make_unique<backends::SDServer>(log_level_, model_manager_, backend);
     } else if (model_info.recipe == "flm") {
         std::cout << "[Router] Creating FastFlowLM backend" << std::endl;
         new_server = std::make_unique<backends::FastFlowLMServer>(log_level_, model_manager_);