From 15d2654755dcef67a6d64f72b9c20aba81019978 Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:08:28 -0800
Subject: [PATCH 1/6] Update stable-diffusion.cpp

---
 stable-diffusion.cpp | 98 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 81 insertions(+), 17 deletions(-)
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 4d28a147b..a8dd1803a 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -123,16 +123,16 @@ class StableDiffusionGGML {
         } else if (rng_type == CUDA_RNG) {
             rng = std::make_shared<PhiloxRNG>();
         }
-    }
-
+    }					
+	
     ~StableDiffusionGGML() {
         if (clip_backend != backend) {
             ggml_backend_free(clip_backend);
         }
-        if (control_net_backend != backend) {
+        if (control_net_backend != backend && control_net_backend != clip_backend) {
             ggml_backend_free(control_net_backend);
         }
-        if (vae_backend != backend) {
+        if (vae_backend != backend && vae_backend != clip_backend && vae_backend != control_net_backend) {
             ggml_backend_free(vae_backend);
         }
         ggml_backend_free(backend);
@@ -153,11 +153,15 @@ class StableDiffusionGGML {
                         schedule_t schedule,
                         bool clip_on_cpu,
                         bool control_net_cpu,
-                        bool vae_on_cpu) {
+                        bool vae_on_cpu,
+                        int model_backend_index,
+						int clip_backend_index,
+						int vae_backend_index) {
         use_tiny_autoencoder = taesd_path.size() > 0;
 #ifdef SD_USE_CUBLAS
         LOG_DEBUG("Using CUDA backend");
-        backend = ggml_backend_cuda_init(0);
+        if (model_backend_index == -1) model_backend_index = 0;
+        backend = ggml_backend_cuda_init(model_backend_index);
 #endif
 #ifdef SD_USE_METAL
         LOG_DEBUG("Using Metal backend");
@@ -166,16 +170,22 @@ class StableDiffusionGGML {
 #endif
 #ifdef SD_USE_VULKAN
         LOG_DEBUG("Using Vulkan backend");
-        for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
-            backend = ggml_backend_vk_init(device);
-        }
-        if (!backend) {
-            LOG_WARN("Failed to initialize Vulkan backend");
-        }
+        if (model_backend_index == -1) {
+			// default behavior, last device selected
+			for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
+				backend = ggml_backend_vk_init(device);
+			}
+			if (!backend) {
+				LOG_WARN("Failed to initialize Vulkan backend");
+			}
+		} else {
+			backend = ggml_backend_vk_init(model_backend_index);
+		}
 #endif
 #ifdef SD_USE_SYCL
         LOG_DEBUG("Using SYCL backend");
-        backend = ggml_backend_sycl_init(0);
+        if (model_backend_index == -1) model_backend_index = 0;
+        backend = ggml_backend_sycl_init(model_backend_index);
 #endif
 
         if (!backend) {
@@ -321,7 +331,29 @@ class StableDiffusionGGML {
             if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
                 LOG_INFO("CLIP: Using CPU backend");
                 clip_backend = ggml_backend_cpu_init();
-            }
+            } else if (clip_backend_index > -1 && clip_backend_index != model_backend_index) {
+#ifdef SD_USE_CUBLAS
+				LOG_DEBUG("CLIP: Using CUDA backend");
+				clip_backend = ggml_backend_cuda_init(clip_backend_index);
+#endif
+#ifdef SD_USE_VULKAN
+				LOG_DEBUG("CLIP: Using Vulkan backend");
+				clip_backend = ggml_backend_vk_init(clip_backend_index);
+#endif
+#ifdef SD_USE_METAL
+				LOG_DEBUG("CLIP: Using Metal backend");
+				// should be the same
+				clip_backend = backend;
+#endif
+#ifdef SD_USE_SYCL
+				LOG_DEBUG("CLIP: Using SYCL backend");
+				clip_backend = ggml_backend_sycl_init(clip_backend_index);
+#endif
+				if (!clip_backend) {
+					LOG_WARN("No backend device found for CLIP, defaulting to model device.");
+					clip_backend = backend;
+				}
+			}
             if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B) {
                 cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend, conditioner_wtype);
                 diffusion_model  = std::make_shared<MMDiTModel>(backend, diffusion_model_wtype, version);
@@ -342,7 +374,33 @@ class StableDiffusionGGML {
                 if (vae_on_cpu && !ggml_backend_is_cpu(backend)) {
                     LOG_INFO("VAE Autoencoder: Using CPU backend");
                     vae_backend = ggml_backend_cpu_init();
-                } else {
+                } else if (vae_backend_index == clip_backend_index) {
+					vae_backend = clip_backend;
+				} else if (vae_backend_index == model_backend_index) {
+					vae_backend = backend;
+				} else if (vae_backend_index > -1) {
+#ifdef SD_USE_CUBLAS
+					LOG_DEBUG("VAE Autoencoder: Using CUDA backend");
+					vae_backend = ggml_backend_cuda_init(vae_backend_index);
+#endif
+#ifdef SD_USE_VULKAN
+					LOG_DEBUG("VAE Autoencoder: Using Vulkan backend");
+					vae_backend = ggml_backend_vk_init(vae_backend_index);
+#endif
+#ifdef SD_USE_METAL
+					LOG_DEBUG("CLIP: Using Metal backend");
+					// should be the same
+					vae_backend = backend;
+#endif
+#ifdef SD_USE_SYCL
+					LOG_DEBUG("VAE Autoencoder: Using SYCL backend");
+					vae_backend = ggml_backend_sycl_init(vae_backend_index);
+#endif
+					if (!vae_backend) {
+						LOG_WARN("No backend device found for VAE, defaulting to model device.");
+						vae_backend = backend;
+					}
+				} else {
                     vae_backend = backend;
                 }
                 first_stage_model = std::make_shared<AutoEncoderKL>(vae_backend, vae_wtype, vae_decode_only, false, version);
@@ -1035,7 +1093,10 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
                      enum schedule_t s,
                      bool keep_clip_on_cpu,
                      bool keep_control_net_cpu,
-                     bool keep_vae_on_cpu) {
+                     bool keep_vae_on_cpu,
+					 int model_backend_index,
+					 int clip_backend_index,
+					 int vae_backend_index) {
     sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
     if (sd_ctx == NULL) {
         return NULL;
@@ -1076,7 +1137,10 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
                                     s,
                                     keep_clip_on_cpu,
                                     keep_control_net_cpu,
-                                    keep_vae_on_cpu)) {
+                                    keep_vae_on_cpu,
+                                    model_backend_index,
+                                    clip_backend_index,
+									vae_backend_index)) {
         delete sd_ctx->sd;
         sd_ctx->sd = NULL;
         free(sd_ctx);

From 11bc8c42a392299b47e20bb0cf67cc6350f6c27f Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:09:02 -0800
Subject: [PATCH 2/6] Update stable-diffusion.h

---
 stable-diffusion.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/stable-diffusion.h b/stable-diffusion.h
index 812e8fc94..aa5b2bf49 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -142,7 +142,10 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
                             enum schedule_t s,
                             bool keep_clip_on_cpu,
                             bool keep_control_net_cpu,
-                            bool keep_vae_on_cpu);
+                            bool keep_vae_on_cpu,
+                            int model_backend_index = -1,
+                            int clip_backend_index = -1,
+							int vae_backend_index = -1);
 
 SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
 

From ead839fc793254bcba9adfe8bc05a77737e0cb63 Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:09:50 -0800
Subject: [PATCH 3/6] Update main.cpp

---
 examples/cli/main.cpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index f1bdc698b..25faea88f 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -5,6 +5,7 @@
 #include <random>
 #include <string>
 #include <vector>
+#include <stdexcept>
 
 // #include "preprocessing.hpp"
 #include "flux.hpp"
@@ -119,6 +120,10 @@ struct SDParams {
     bool canny_preprocess         = false;
     bool color                    = false;
     int upscale_repeats           = 1;
+    
+    int model_backend_index = -1;
+    int clip_backend_index = -1;
+    int vae_backend_index = -1;
 };
 
 void print_params(SDParams params) {
@@ -164,6 +169,9 @@ void print_params(SDParams params) {
     printf("    batch_count:       %d\n", params.batch_count);
     printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
     printf("    upscale_repeats:   %d\n", params.upscale_repeats);
+    printf("	model_backend_index %d\n", params.model_backend_index);
+    printf("	clip_backend_index %d\n", params.clip_backend_index);
+    printf("	vae_backend_index  %d\n", params.vae_backend_index);
 }
 
 void print_usage(int argc, const char* argv[]) {
@@ -219,6 +227,9 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --canny                            apply canny preprocessor (edge detection)\n");
     printf("  --color                            Colors the logging tags according to level\n");
     printf("  -v, --verbose                      print extra info\n");
+    printf("  --model-backend-index              specify which device the model defaults to using\n");
+	printf("  --clip-backend-index               specify which device the CLIP model uses\n");
+    printf("  --vae-backend-index                specify which device the VAE model uses\n");
 }
 
 void parse_args(int argc, const char** argv, SDParams& params) {
@@ -534,7 +545,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
             params.verbose = true;
         } else if (arg == "--color") {
             params.color = true;
-        } else {
+        }
+        else if (arg == "--model-backend-index") {
+			params.model_backend_index = std::stoi(argv[++i]);
+		} else if (arg == "--clip-backend-index") {
+			params.clip_backend_index = std::stoi(argv[++i]);
+		} else if (arg == "--vae-backend-index") {
+			params.vae_backend_index = std::stoi(argv[++i]);
+		} else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             print_usage(argc, argv);
             exit(1);
@@ -791,7 +809,10 @@ int main(int argc, const char* argv[]) {
                                   params.schedule,
                                   params.clip_on_cpu,
                                   params.control_net_cpu,
-                                  params.vae_on_cpu);
+                                  params.vae_on_cpu,
+                                  params.model_backend_index,
+                                  params.clip_backend_index,
+								  params.vae_backend_index);
 
     if (sd_ctx == NULL) {
         printf("new_sd_ctx_t failed\n");

From 9e571a558a3db9cf0346518d81a5315f27aa6726 Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Thu, 14 Nov 2024 11:49:35 -0800
Subject: [PATCH 4/6] Remove default params

---
 stable-diffusion.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stable-diffusion.h b/stable-diffusion.h
index aa5b2bf49..2d6f48f21 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -143,9 +143,9 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
                             bool keep_clip_on_cpu,
                             bool keep_control_net_cpu,
                             bool keep_vae_on_cpu,
-                            int model_backend_index = -1,
-                            int clip_backend_index = -1,
-							int vae_backend_index = -1);
+                            int model_backend_index,
+                            int clip_backend_index,
+			    int vae_backend_index);
 
 SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
 

From 282c37162ee3579ecec1fcb6d3cd926897f7adeb Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Thu, 14 Nov 2024 11:58:25 -0800
Subject: [PATCH 5/6] Update stable-diffusion.cpp

Removed whitespace, changed the default vulkan device initialization to use the last device without initializing all other previous devices.
---
 stable-diffusion.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index a8dd1803a..aaaa1f607 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -123,8 +123,8 @@ class StableDiffusionGGML {
         } else if (rng_type == CUDA_RNG) {
             rng = std::make_shared<PhiloxRNG>();
         }
-    }					
-	
+    }
+
     ~StableDiffusionGGML() {
         if (clip_backend != backend) {
             ggml_backend_free(clip_backend);
@@ -171,16 +171,15 @@ class StableDiffusionGGML {
 #ifdef SD_USE_VULKAN
         LOG_DEBUG("Using Vulkan backend");
         if (model_backend_index == -1) {
-			// default behavior, last device selected
-			for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
-				backend = ggml_backend_vk_init(device);
-			}
-			if (!backend) {
-				LOG_WARN("Failed to initialize Vulkan backend");
-			}
-		} else {
-			backend = ggml_backend_vk_init(model_backend_index);
+		// default behavior, use last device selected
+		int device = ggml_backend_vk_get_device_count() - 1;
+		backend = ggml_backend_vk_init(device);
+		if (!backend) {
+			LOG_WARN("Failed to initialize Vulkan backend");
 		}
+	} else {
+		backend = ggml_backend_vk_init(model_backend_index);
+	}
 #endif
 #ifdef SD_USE_SYCL
         LOG_DEBUG("Using SYCL backend");

From 2c9fe5ce2166431c1d96ddf3b97afc1630270825 Mon Sep 17 00:00:00 2001
From: Cookie Crumbs <145591499+softcookiepp@users.noreply.github.com>
Date: Wed, 20 Nov 2024 14:51:38 -0800
Subject: [PATCH 6/6] Update main.cpp

---
 examples/cli/main.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index 25faea88f..eb02a51be 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -5,7 +5,6 @@
 #include <random>
 #include <string>
 #include <vector>
-#include <stdexcept>
 
 // #include "preprocessing.hpp"
 #include "flux.hpp"