Skip to content

Commit

Permalink
Fix loras having a weak effect when applied on fp8.
Browse files Browse the repository at this point in the history
  • Loading branch information
comfyanonymous committed Aug 17, 2024
1 parent 14af129 commit bb222ce
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 16 deletions.
51 changes: 51 additions & 0 deletions comfy/float.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import torch

#Not 100% sure about this
def manual_stochastic_round_to_float8(x, dtype):
if dtype == torch.float8_e4m3fn:
EXPONENT_BITS, MANTISSA_BITS, EXPONENT_BIAS = 4, 3, 7
elif dtype == torch.float8_e5m2:
EXPONENT_BITS, MANTISSA_BITS, EXPONENT_BIAS = 5, 2, 15
else:
raise ValueError("Unsupported dtype")

sign = torch.sign(x)
abs_x = x.abs()

# Combine exponent calculation and clamping
exponent = torch.clamp(
torch.floor(torch.log2(abs_x)).to(torch.int32) + EXPONENT_BIAS,
0, 2**EXPONENT_BITS - 1
)

# Combine mantissa calculation and rounding
mantissa = abs_x / (2.0 ** (exponent - EXPONENT_BIAS)) - 1.0
mantissa_scaled = mantissa * (2**MANTISSA_BITS)
mantissa_floor = mantissa_scaled.floor()
mantissa = torch.where(
torch.rand_like(mantissa_scaled) < (mantissa_scaled - mantissa_floor),
(mantissa_floor + 1) / (2**MANTISSA_BITS),
mantissa_floor / (2**MANTISSA_BITS)
)

# Combine final result calculation
result = sign * (2.0 ** (exponent - EXPONENT_BIAS)) * (1.0 + mantissa)

# Handle zero case
result = torch.where(abs_x == 0, torch.zeros_like(result), result)

return result.to(dtype=dtype)



def stochastic_rounding(value, dtype):
if dtype == torch.float32:
return value.to(dtype=torch.float32)
if dtype == torch.float16:
return value.to(dtype=torch.float16)
if dtype == torch.bfloat16:
return value.to(dtype=torch.bfloat16)
if dtype == torch.float8_e4m3fn or dtype == torch.float8_e5m2:
return manual_stochastic_round_to_float8(value, dtype)

return value.to(dtype=dtype)
48 changes: 32 additions & 16 deletions comfy/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
import logging
import uuid
import collections
import math

import comfy.utils
import comfy.float
import comfy.model_management
from comfy.types import UnetWrapperFunction

Expand Down Expand Up @@ -327,7 +329,8 @@ def patch_weight_to_device(self, key, device_to=None, inplace_update=False):
temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
else:
temp_weight = weight.to(torch.float32, copy=True)
out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype)
out_weight = self.calculate_weight(self.patches[key], temp_weight, key)
out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype)
if inplace_update:
comfy.utils.copy_to_param(self.model, key, out_weight)
else:
Expand All @@ -341,12 +344,16 @@ def patch_model(self, device_to=None, patch_weights=True):

if patch_weights:
model_sd = self.model_state_dict()
keys_sort = []
for key in self.patches:
if key not in model_sd:
logging.warning("could not patch. key doesn't exist in model: {}".format(key))
continue
keys_sort.append((math.prod(model_sd[key].shape), key))

self.patch_weight_to_device(key, device_to)
keys_sort.sort(reverse=True)
for ks in keys_sort:
self.patch_weight_to_device(ks[1], device_to)

if device_to is not None:
self.model.to(device_to)
Expand All @@ -359,6 +366,7 @@ def lowvram_load(self, device_to=None, lowvram_model_memory=0, force_patch_weigh
mem_counter = 0
patch_counter = 0
lowvram_counter = 0
load_completely = []
for n, m in self.model.named_modules():
lowvram_weight = False

Expand Down Expand Up @@ -395,20 +403,28 @@ def lowvram_load(self, device_to=None, lowvram_model_memory=0, force_patch_weigh
wipe_lowvram_weight(m)

if hasattr(m, "weight"):
mem_counter += comfy.model_management.module_size(m)
param = list(m.parameters())
if len(param) > 0:
weight = param[0]
if weight.device == device_to:
continue

weight_to = None
if full_load:#TODO
weight_to = device_to
self.patch_weight_to_device(weight_key, device_to=weight_to) #TODO: speed this up without OOM
self.patch_weight_to_device(bias_key, device_to=weight_to)
m.to(device_to)
logging.debug("lowvram: loaded module regularly {} {}".format(n, m))
mem_used = comfy.model_management.module_size(m)
mem_counter += mem_used
load_completely.append((mem_used, n, m))

load_completely.sort(reverse=True)
for x in load_completely:
n = x[1]
m = x[2]
weight_key = "{}.weight".format(n)
bias_key = "{}.bias".format(n)
param = list(m.parameters())
if len(param) > 0:
weight = param[0]
if weight.device == device_to:
continue

self.patch_weight_to_device(weight_key, device_to=device_to)
self.patch_weight_to_device(bias_key, device_to=device_to)
logging.debug("lowvram: loaded module regularly {} {}".format(n, m))

for x in load_completely:
x[2].to(device_to)

if lowvram_counter > 0:
logging.info("loaded partially {} {} {}".format(lowvram_model_memory / (1024 * 1024), mem_counter / (1024 * 1024), patch_counter))
Expand Down

1 comment on commit bb222ce

@rabidcopy
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Please sign in to comment.