Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions .github/workflows/cpp_server_build_test_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,16 @@ jobs:
test_type: whisper
backend: npu
runner: [rai300_400, Windows]
# Stable Diffusion
- name: stable-diffusion
# Stable Diffusion (CPU)
- name: stable-diffusion (cpu)
test_type: sd
backend: ""
runner: [rai300_400, Windows]
# Stable Diffusion (ROCm)
- name: stable-diffusion (rocm)
test_type: sd
backend: rocm
runner: [stx-halo, Windows]
# Text to speech
- name: text-to-speech
test_type: tts
Expand Down Expand Up @@ -438,7 +444,11 @@ jobs:
}
}
"sd" {
& $venvPython test/server_sd.py --server-binary $serverExe
if ("${{ matrix.backend }}") {
& $venvPython test/server_sd.py --backend ${{ matrix.backend }} --server-binary $serverExe
} else {
& $venvPython test/server_sd.py --server-binary $serverExe
}
}
"tts" {
& $venvPython test/server_tts.py --server-binary $serverExe
Expand Down Expand Up @@ -473,9 +483,10 @@ jobs:
test_type: llm
backend: rocm
runner: [stx-halo, Linux]
# Stable Diffusion
- name: stable-diffusion
# Stable Diffusion (CPU)
- name: stable-diffusion (cpu)
test_type: sd
backend: ""
runner: [rai300_400, Linux]
# Text to speech
- name: text-to-speech
Expand Down Expand Up @@ -522,7 +533,11 @@ jobs:
$VENV_PYTHON test/server_llm.py --wrapped-server llamacpp --backend ${{ matrix.backend }} --server-binary lemonade-server
;;
sd)
$VENV_PYTHON test/server_sd.py --server-binary lemonade-server
if [ -n "${{ matrix.backend }}" ]; then
$VENV_PYTHON test/server_sd.py --backend ${{ matrix.backend }} --server-binary lemonade-server
else
$VENV_PYTHON test/server_sd.py --server-binary lemonade-server
fi
;;
tts)
$VENV_PYTHON test/server_tts.py --server-binary lemonade-server
Expand Down
66 changes: 66 additions & 0 deletions examples/api_image_generation_rocm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python3
"""
ROCm GPU Image Generation Example

Demonstrates AMD GPU-accelerated image generation with stable-diffusion.cpp.

Prerequisites:
pip install openai requests
lemonade-server serve --sdcpp rocm # Start server with ROCm backend

Usage:
python api_image_gen_rocm.py
"""

import base64
import time
import requests
from openai import OpenAI

# Connect to local lemonade server
BASE_URL = "http://localhost:8000/api/v1"
client = OpenAI(base_url=BASE_URL, api_key="not-needed")

# Models to test (name, steps, cfg_scale)
MODELS = [
("SD-Turbo", 4, 1.0),
("SD-1.5", 20, 7.0),
("SDXL-Turbo", 4, 1.0),
("SDXL-Base-1.0", 20, 7.0),
]

prompt = "A majestic dragon breathing fire over a medieval castle"

print("Testing ROCm-accelerated image generation\n")

for model, steps, cfg_scale in MODELS:
print(f"Generating with {model}...")

# Load model with ROCm backend
requests.post(
f"{BASE_URL}/load",
json={"model_name": model, "sd-cpp_backend": "rocm"},
timeout=300,
)

# Generate image
start = time.time()
response = client.images.generate(
model=model,
prompt=prompt,
size="512x512",
n=1,
response_format="b64_json",
extra_body={"steps": steps, "cfg_scale": cfg_scale},
)
elapsed = time.time() - start

# Save image
image_data = base64.b64decode(response.data[0].b64_json)
filename = f"{model.lower().replace('-', '_')}_rocm.png"
with open(filename, "wb") as f:
f.write(image_data)

print(f" ✓ Generated in {elapsed:.2f}s → {filename}\n")

print("Done!")
6 changes: 5 additions & 1 deletion src/cpp/include/lemon/backends/sd_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ namespace backends {
class SDServer : public WrappedServer, public IImageServer {
public:
explicit SDServer(const std::string& log_level = "info",
ModelManager* model_manager = nullptr);
ModelManager* model_manager = nullptr,
const std::string& backend = "cpu");

~SDServer() override;

Expand All @@ -39,6 +40,9 @@ class SDServer : public WrappedServer, public IImageServer {
// IImageServer implementation
json image_generations(const json& request) override;

private:
std::string backend_;

private:
// Server executable helper
std::string find_executable_in_install_dir(const std::string& install_dir);
Expand Down
5 changes: 4 additions & 1 deletion src/cpp/resources/backend_versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
"rocm": "v1.8.2",
"npu": "v1.8.2"
},
"sd-cpp": "master-471-7010bb4",
"sd-cpp": {
"cpu": "master-471-7010bb4",
"rocm": "master-487-43e829f"
},
"ryzenai-server": "v1.0.2",
"flm": {
"version": "v0.9.27",
Expand Down
89 changes: 72 additions & 17 deletions src/cpp/server/backends/sd_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "lemon/utils/path_utils.h"
#include "lemon/utils/json_utils.h"
#include "lemon/error_types.h"
#include "lemon/system_info.h"
#include <httplib.h>
#include <iostream>
#include <filesystem>
Expand All @@ -18,18 +19,30 @@ namespace lemon {
namespace backends {

// Helper to get stable-diffusion.cpp version from configuration
static std::string get_sd_version() {
static std::string get_sd_version(const std::string& backend = "cpu") {
std::string config_path = utils::get_resource_path("resources/backend_versions.json");

try {
json config = utils::JsonUtils::load_from_file(config_path);

if (!config.contains("sd-cpp") || !config["sd-cpp"].is_string()) {
throw std::runtime_error("backend_versions.json is missing 'sd-cpp' version");
if (!config.contains("sd-cpp")) {
throw std::runtime_error("backend_versions.json is missing 'sd-cpp'");
}

std::string version = config["sd-cpp"].get<std::string>();
std::cout << "[SDServer] Using sd-cpp version from config: " << version << std::endl;
std::string version;
// Support both old string format and new object format with backend keys
if (config["sd-cpp"].is_string()) {
version = config["sd-cpp"].get<std::string>();
} else if (config["sd-cpp"].is_object()) {
if (!config["sd-cpp"].contains(backend)) {
throw std::runtime_error("backend_versions.json sd-cpp missing backend: " + backend);
}
version = config["sd-cpp"][backend].get<std::string>();
} else {
throw std::runtime_error("backend_versions.json 'sd-cpp' has invalid format");
}

std::cout << "[SDServer] Using sd-cpp version for backend '" << backend << "': " << version << std::endl;
return version;

} catch (const std::exception& e) {
Expand All @@ -46,16 +59,19 @@ static std::string get_sd_version() {
}

// Helper to get the install directory for sd executable
static std::string get_sd_install_dir() {
return (fs::path(get_downloaded_bin_dir()) / "sd-cpp").string();
static std::string get_sd_install_dir(const std::string& backend = "cpu") {
return (fs::path(get_downloaded_bin_dir()) / "sd-cpp" / backend).string();
}

SDServer::SDServer(const std::string& log_level,
ModelManager* model_manager)
: WrappedServer("sd-server", log_level, model_manager) {
ModelManager* model_manager,
const std::string& backend)
: WrappedServer("sd-server", log_level, model_manager),
backend_(backend) {

if (is_debug()) {
std::cout << "[SDServer] Created with log_level=" << log_level << std::endl;
std::cout << "[SDServer] Created with log_level=" << log_level
<< ", backend=" << backend << std::endl;
}
}

Expand Down Expand Up @@ -132,7 +148,7 @@ std::string SDServer::find_executable_in_install_dir(const std::string& install_


void SDServer::install(const std::string& /* backend */) {
std::string install_dir = get_sd_install_dir();
std::string install_dir = get_sd_install_dir(backend_);

// Check if already installed
std::string exe_path = find_executable_in_install_dir(install_dir);
Expand All @@ -141,10 +157,10 @@ void SDServer::install(const std::string& /* backend */) {
return;
}

std::cout << "[SDServer] Installing stable-diffusion.cpp server..." << std::endl;
std::cout << "[SDServer] Installing stable-diffusion.cpp server (backend: " << backend_ << ")..." << std::endl;

// Get version and construct download URL
std::string expected_version = get_sd_version();
std::string expected_version = get_sd_version(backend_);
std::string repo = "leejet/stable-diffusion.cpp";

// Transform version for URL (master-NNN-HASH -> master-HASH)
Expand All @@ -158,7 +174,28 @@ void SDServer::install(const std::string& /* backend */) {
}
}

// ADDED: ROCm backend selection for AMD GPU support
std::string filename;
if (backend_ == "rocm") {
// Validate ROCm architecture support
std::string target_arch = lemon::SystemInfo::get_rocm_arch();
if (target_arch.empty()) {
throw std::runtime_error(
lemon::SystemInfo::get_unsupported_backend_error("sd-cpp", "rocm")
);
}


#ifdef _WIN32
filename = "sd-" + short_version + "-bin-win-rocm-x64.zip";
#elif defined(__linux__)
filename = "sd-" + short_version + "-bin-linux-rocm-x64.zip";
#else
throw std::runtime_error("ROCm sd.cpp only supported on Windows and Linux");
#endif
std::cout << "[SDServer] Using ROCm GPU backend" << std::endl;
} else {
// CPU build (default) - unchanged from original
#ifdef _WIN32
// Windows CPU build with AVX2
filename = "sd-" + short_version + "-bin-win-avx2-x64.zip";
Expand All @@ -171,6 +208,7 @@ void SDServer::install(const std::string& /* backend */) {
#else
throw std::runtime_error("Unsupported platform for stable-diffusion.cpp");
#endif
}

std::string url = "https://github.com/" + repo + "/releases/download/" +
expected_version + "/" + filename;
Expand Down Expand Up @@ -331,7 +369,7 @@ void SDServer::load(const std::string& model_name,
model_path_ = model_path;

// Get sd-server executable path
std::string exe_path = find_executable_in_install_dir(get_sd_install_dir());
std::string exe_path = find_executable_in_install_dir(get_sd_install_dir(backend_));
if (exe_path.empty()) {
throw std::runtime_error("sd-server executable not found");
}
Expand All @@ -342,7 +380,7 @@ void SDServer::load(const std::string& model_name,
throw std::runtime_error("Failed to find an available port");
}

std::cout << "[SDServer] Starting server on port " << port_ << std::endl;
std::cout << "[SDServer] Starting server on port " << port_ << " (backend: " << backend_ << ")" << std::endl;

// Build command line arguments
std::vector<std::string> args = {
Expand All @@ -354,10 +392,12 @@ void SDServer::load(const std::string& model_name,
args.push_back("-v");
}

// Set up environment variables for Linux (LD_LIBRARY_PATH)
// Set up environment variables
std::vector<std::pair<std::string, std::string>> env_vars;
#ifndef _WIN32
fs::path exe_dir = fs::path(exe_path).parent_path();

#ifndef _WIN32
// For Linux, always set LD_LIBRARY_PATH to include executable directory
std::string lib_path = exe_dir.string();

const char* existing_ld_path = std::getenv("LD_LIBRARY_PATH");
Expand All @@ -369,6 +409,21 @@ void SDServer::load(const std::string& model_name,
if (is_debug()) {
std::cout << "[SDServer] Setting LD_LIBRARY_PATH=" << lib_path << std::endl;
}
#else
// ROCm builds on Windows require hipblaslt.dll, rocblas.dll, amdhip64.dll, etc.
// These DLLs are distributed alongside sd-server.exe but need PATH to be set for loading
if (backend_ == "rocm") {
// Add executable directory to PATH for ROCm runtime DLLs
// This allows the sd-server.exe to find required HIP/ROCm libraries at runtime
std::string new_path = exe_dir.string();
const char* existing_path = std::getenv("PATH");
if (existing_path && strlen(existing_path) > 0) {
new_path = new_path + ";" + std::string(existing_path);
}
env_vars.push_back({"PATH", new_path});

std::cout << "[SDServer] ROCm backend: added " << exe_dir.string() << " to PATH" << std::endl;
}
#endif

// Launch the server process
Expand Down
11 changes: 10 additions & 1 deletion src/cpp/server/recipe_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ static const json DEFAULTS = {
{"llamacpp_backend", "vulkan"}, // Will be overridden dynamically
#endif
{"llamacpp_args", ""},
{"sd-cpp_backend", "cpu"}, // sd.cpp backend selection (cpu or rocm)
{"whispercpp_backend", "npu"},
// Image generation defaults (for sd-cpp recipe)
{"steps", 20},
Expand Down Expand Up @@ -44,6 +45,14 @@ static const json CLI_OPTIONS = {
{"envname", "LEMONADE_LLAMACPP_ARGS"},
{"help", "Custom arguments to pass to llama-server (must not conflict with managed args)"}
}},
// sd.cpp backend selection option
{"--sdcpp", {
{"option_name", "sd-cpp_backend"},
{"type_name", "BACKEND"},
{"allowed_values", {"cpu", "rocm"}},
{"envname", "LEMONADE_SDCPP"},
{"help", "SD.cpp backend to use (cpu for CPU, rocm for AMD GPU)"}
}},
// ASR options
{"--whispercpp", {
{"option_name", "whispercpp_backend"},
Expand Down Expand Up @@ -87,7 +96,7 @@ static std::vector<std::string> get_keys_for_recipe(const std::string& recipe) {
} else if (recipe == "oga-npu" || recipe == "oga-hybrid" || recipe == "oga-cpu" || recipe == "ryzenai" || recipe == "flm") {
return {"ctx_size"};
} else if (recipe == "sd-cpp") {
return {"steps", "cfg_scale", "width", "height"};
return {"sd-cpp_backend", "steps", "cfg_scale", "width", "height"};
} else {
return {};
}
Expand Down
9 changes: 7 additions & 2 deletions src/cpp/server/router.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,13 @@ std::unique_ptr<WrappedServer> Router::create_backend_server(const ModelInfo& mo
std::cout << "[Router] Creating Kokoro backend" << std::endl;
new_server = std::make_unique<backends::KokoroServer>(log_level_, model_manager_);
} else if (model_info.recipe == "sd-cpp") {
std::cout << "[Router] Creating SDServer backend" << std::endl;
new_server = std::make_unique<backends::SDServer>(log_level_, model_manager_);
// Pass sd-cpp_backend from default_options_ to SDServer
std::string backend = "cpu"; // default
if (default_options_.contains("sd-cpp_backend") && default_options_["sd-cpp_backend"].is_string()) {
backend = default_options_["sd-cpp_backend"].get<std::string>();
}
std::cout << "[Router] Creating SDServer backend (backend: " << backend << ")" << std::endl;
new_server = std::make_unique<backends::SDServer>(log_level_, model_manager_, backend);
} else if (model_info.recipe == "flm") {
std::cout << "[Router] Creating FastFlowLM backend" << std::endl;
new_server = std::make_unique<backends::FastFlowLMServer>(log_level_, model_manager_);
Expand Down
Loading
Loading