feat: add flux support (#356)

* add flux support * avoid build failures in non-CUDA environments * fix schnell support * add k quants support * add support for applying lora to quantized tensors * add inplace conversion support for f8_e4m3 (#359) in the same way it is done for bf16 like how bf16 converts losslessly to fp32, f8_e4m3 converts losslessly to fp16 * add xlabs flux comfy converted lora support * update docs --------- Co-authored-by: Erik Scholz <Green-Sky@users.noreply.github.com>
2026-02-04 03:01:18 +01:00 · 2024-08-24 14:29:52 +08:00
parent 697d000f49
commit 64d231f384
25 changed files with 1886 additions and 172 deletions
--- a/vae.hpp
+++ b/vae.hpp
@@ -455,9 +455,9 @@ protected:
 public:
    AutoencodingEngine(bool decode_only       = true,
                       bool use_video_decoder = false,
-                       SDVersion version      = VERSION_1_x)
+                       SDVersion version      = VERSION_SD1)
        : decode_only(decode_only), use_video_decoder(use_video_decoder) {
-        if (version == VERSION_3_2B) {
+        if (version == VERSION_SD3_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
            dd_config.z_channels = 16;
            use_quant            = false;
        }
@@ -527,7 +527,7 @@ struct AutoEncoderKL : public GGMLRunner {
                  ggml_type wtype,
                  bool decode_only       = false,
                  bool use_video_decoder = false,
-                  SDVersion version      = VERSION_1_x)
+                  SDVersion version      = VERSION_SD1)
        : decode_only(decode_only), ae(decode_only, use_video_decoder, version), GGMLRunner(backend, wtype) {
        ae.init(params_ctx, wtype);
    }