diff --git a/src/brevitas/quant/fixed_point.py b/src/brevitas/quant/fixed_point.py index bb3d7893e..a29a299f3 100644 --- a/src/brevitas/quant/fixed_point.py +++ b/src/brevitas/quant/fixed_point.py @@ -108,8 +108,8 @@ class Int4WeightPerTensorFixedPointDecoupled(WeightPerTensorFloatDecoupledL2Para class Int8WeightNormL2PerChannelFixedPoint(WeightNormPerChannelFloatDecoupled): """ Experimental 8-bit narrow signed integer quantizer with learned per-channel scaling factors - and L2 weight normalization based on `Quantized Neural Networks for Low-Precision Accumulation - with Guaranteed Overflow Avoidance` by I. Colbert, A. Pappalardo, and J. Petri-Koenig + and L2 weight normalization based on `Quantized Neural Networks for Low-Precision Accumulation + with Guaranteed Overflow Avoidance` by I. Colbert, A. Pappalardo, and J. Petri-Koenig (https://arxiv.org/abs/2301.13376). The quantizer learns scaling factors in the float domain and learns vector parameter g in the log domain with the half-way rounding function. Suitable for retraining from floating-point depthwise separable weights.