feat: add flux support (#356)

* add flux support

* avoid build failures in non-CUDA environments

* fix schnell support

* add k quants support

* add support for applying lora to quantized tensors

* add inplace conversion support for f8_e4m3 (#359)

in the same way it is done for bf16
like how bf16 converts losslessly to fp32,
f8_e4m3 converts losslessly to fp16

* add xlabs flux comfy converted lora support

* update docs

---------

Co-authored-by: Erik Scholz <Green-Sky@users.noreply.github.com>
This commit is contained in:
leejet
2024-08-24 14:29:52 +08:00
committed by GitHub
parent 697d000f49
commit 64d231f384
25 changed files with 1886 additions and 172 deletions

View File

@@ -455,9 +455,9 @@ protected:
public:
AutoencodingEngine(bool decode_only = true,
bool use_video_decoder = false,
SDVersion version = VERSION_1_x)
SDVersion version = VERSION_SD1)
: decode_only(decode_only), use_video_decoder(use_video_decoder) {
if (version == VERSION_3_2B) {
if (version == VERSION_SD3_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
dd_config.z_channels = 16;
use_quant = false;
}
@@ -527,7 +527,7 @@ struct AutoEncoderKL : public GGMLRunner {
ggml_type wtype,
bool decode_only = false,
bool use_video_decoder = false,
SDVersion version = VERSION_1_x)
SDVersion version = VERSION_SD1)
: decode_only(decode_only), ae(decode_only, use_video_decoder, version), GGMLRunner(backend, wtype) {
ae.init(params_ctx, wtype);
}