Skip to content

Commit

Permalink
Floating Point to Fixed Point Converter Module (#123)
Browse files Browse the repository at this point in the history
* float to fixed converter module with documentation
  • Loading branch information
soneryaldiz authored Nov 5, 2024
1 parent a66bcda commit 5bf9681
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 1 deletion.
8 changes: 8 additions & 0 deletions doc/components/fixed_point.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ The `FixedPoint` type is an extension of `Logic` with additional attributes (sig
## FixedToFloat

This component converts a fixed-point signal to a floating point signal specified by exponent and mantissa width. The output is rounded to nearest even when applicable and set to infinity if the input exceed the representable range.

## FloatToFixed

This component converts a floating-point signal to a signed fixed-point signal. Infinities and NaN's are not supported. The integer and fraction widths are auto-calculated to achieve lossles conversion.

## Float8ToFixed

This component converts an 8-bit floating-point (FP8) representation (E5M2 or E4M3) to a signed fixed-point representation. This component offers using the same hardware for both FP8 formats. Therefore, both input and output are of type `Logic` and can be cast from/to floating point/fixed point by the producer/consumer based on the selected `mode`. Infinities and NaN's are not supported. The output width is 33 to accomodate E5M2 without loss.
1 change: 1 addition & 0 deletions lib/src/arithmetic/arithmetic.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export 'carry_save_mutiplier.dart';
export 'compound_adder.dart';
export 'divider.dart';
export 'fixed_to_float.dart';
export 'float_to_fixed.dart';
export 'floating_point/floating_point.dart';
export 'multiplier.dart';
export 'multiplier_lib.dart';
Expand Down
118 changes: 118 additions & 0 deletions lib/src/arithmetic/float_to_fixed.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
//
// float_to_fixed.dart
// Transform floating point input signals to fixed point signals.
//
// 2024 November 1
// Author: Soner Yaldiz <[email protected]>

import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';

/// [FloatToFixed] converts a floating point input to a signed
/// fixed-point output following Q notation (Qm.n format) as introduced by
/// (Texas Instruments)[https://www.ti.com/lit/ug/spru565b/spru565b.pdf].
/// Infinities and NaN's are not supported. Conversion is lossless.
/// The output is in two's complement and in Qm.n format where:
/// m = e_max - bias + 1
/// n = mantissa + bias - 1
class FloatToFixed extends Module {
/// Width of output integer part.
late final int m;

/// Width of output fractional part.
late final int n;

/// Internal representation of the output port
late final FixedPoint _fixed = FixedPoint(signed: true, m: m, n: n);

/// Output fixed point port
late final FixedPoint fixed = _fixed.clone()..gets(output('fixed'));

/// Constructor
FloatToFixed(FloatingPoint float, {super.name = 'FloatToFixed'}) {
float = float.clone()..gets(addInput('float', float, width: float.width));

final bias = FloatingPointValue.computeBias(float.exponent.width);
// E4M3 expands the max exponent by 1.
m = ((float.exponent.width == 4) & (float.mantissa.width == 3))
? bias + 1
: bias;
n = bias + float.mantissa.width - 1;
final outputWidth = m + n + 1;

final jBit = Logic(name: 'jBit')..gets(float.isNormal());
final shift = Logic(name: 'shift', width: float.exponent.width)
..gets(
mux(jBit, float.exponent - 1, Const(0, width: float.exponent.width)));

final number = Logic(name: 'number', width: outputWidth)
..gets([
Const(0, width: outputWidth - float.mantissa.width - 1),
jBit,
float.mantissa
].swizzle() <<
shift);

_fixed <= mux(float.sign, ~number + 1, number);
addOutput('fixed', width: outputWidth) <= _fixed;
}
}

/// [Float8ToFixed] converts an 8-bit floating point (FP8) input
/// to a signed fixed-point output following Q notation (Qm.n) as introduced by
/// (Texas Instruments)[https://www.ti.com/lit/ug/spru565b/spru565b.pdf].
/// FP8 input must follow E4M3 or E5M2 as described in
/// (FP8 formats for deep learning)[https://arxiv.org/pdf/2209.05433].
/// This component offers re-using the same hardware for both FP8 formats.
/// Infinities and NaN's are not supported.
/// The output is of type [Logic] and in two's complement.
/// It can be cast to a [FixedPoint] by the consumer based on the mode.
/// if `mode` is true:
/// Input is treated as E4M3 and converted to Q9.9
/// `fixed[17:9] contains integer part
/// `fixed[8:0] contains fractional part
/// else:
/// Input is treated as E5M2 and converted to Q16.16
/// `fixed[31:16] contains integer part
/// `fixed[15:0] contains fractional part
class Float8ToFixed extends Module {
/// Output port [fixed]
Logic get fixed => output('fixed');

/// Getter for Q23.9
FixedPoint get q23p9 => FixedPoint.of(fixed, signed: true, m: 23, n: 9);

/// Getter for Q16.16
FixedPoint get q16p16 => FixedPoint.of(fixed, signed: true, m: 16, n: 16);

/// Constructor
Float8ToFixed(Logic float, Logic mode, {super.name = 'Float8ToFixed'}) {
float = addInput('float', float, width: float.width);
mode = addInput('mode', mode);
addOutput('fixed', width: 33);

if (float.width != 8) {
throw RohdHclException('Input width must be 8.');
}

final exponent = Logic(name: 'exponent', width: 5)
..gets(mux(
mode, [Const(0), float.slice(6, 3)].swizzle(), float.slice(6, 2)));

final jBit = Logic(name: 'jBit')..gets(exponent.or());

final mantissa = Logic(name: 'mantissa', width: 4)
..gets(mux(mode, [jBit, float.slice(2, 0)].swizzle(),
[Const(0), jBit, float.slice(1, 0)].swizzle()));

final shift = Logic(name: 'shift', width: exponent.width)
..gets(mux(jBit, exponent - 1, Const(0, width: exponent.width)));

final number = Logic(name: 'number', width: 33)
..gets([Const(0, width: 29), mantissa].swizzle() << shift);

fixed <= mux(float[float.width - 1], ~number + 1, number);
}
}
4 changes: 3 additions & 1 deletion lib/src/component_config/components/component_registry.dart
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,7 @@ List<Configurator> get componentRegistry => [
CompressionTreeMultiplierConfigurator(),
ExtremaConfigurator(),
CompoundAdderConfigurator(),
FixedToFloatConfigurator()
FixedToFloatConfigurator(),
FloatToFixedConfigurator(),
Float8ToFixedConfigurator()
];
2 changes: 2 additions & 0 deletions lib/src/component_config/components/components.dart
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export 'config_extrema.dart';
export 'config_fifo.dart';
export 'config_find.dart';
export 'config_fixed_to_float.dart';
export 'config_float8_to_fixed.dart';
export 'config_float_to_fixed.dart';
export 'config_floating_point_adder_round.dart';
export 'config_one_hot.dart';
export 'config_parallel_prefix_adder.dart';
Expand Down
25 changes: 25 additions & 0 deletions lib/src/component_config/components/config_float8_to_fixed.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
//
// config_float_to_fixed.dart
// Configurator for a Float8ToFixed converter.
//
// 2024 November 1
// Author: Soner Yaldiz <[email protected]>

import 'dart:collection';

import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';

/// A [Configurator] for [Float8ToFixed].
class Float8ToFixedConfigurator extends Configurator {
@override
final String name = 'FP8 To Fixed Converter';

@override
late final Map<String, ConfigKnob<dynamic>> knobs = UnmodifiableMapView({});

@override
Module createModule() => Float8ToFixed(Logic(width: 8), Logic());
}
36 changes: 36 additions & 0 deletions lib/src/component_config/components/config_float_to_fixed.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
//
// config_float_to_fixed.dart
// Configurator for a FloatToFixed converter.
//
// 2024 November 1
// Author: Soner Yaldiz <[email protected]>

import 'dart:collection';

import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';

/// A [Configurator] for [FloatToFixed].
class FloatToFixedConfigurator extends Configurator {
/// Width of exponent, must be greater than 0.
final IntConfigKnob exponentWidthKnob = IntConfigKnob(value: 8);

/// Width of mantissa, must be greater than 0.
final IntConfigKnob mantissaWidthKnob = IntConfigKnob(value: 23);

@override
final String name = 'Float To Fixed Converter';

@override
late final Map<String, ConfigKnob<dynamic>> knobs = UnmodifiableMapView({
'Input exponent width': exponentWidthKnob,
'Input mantissa width': mantissaWidthKnob,
});

@override
Module createModule() => FloatToFixed(FloatingPoint(
exponentWidth: exponentWidthKnob.value,
mantissaWidth: mantissaWidthKnob.value));
}
67 changes: 67 additions & 0 deletions test/arithmetic/float_to_fixed_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
//
// float_to_fixed_test.dart
// Test floating point to fixed point conversion.
//
// 2024 November 1
// Author: Soner Yaldiz <[email protected]>

import 'dart:math';
import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';
import 'package:test/test.dart';

void main() async {
test('E5M2 to Q16.16 exhaustive', () async {
final float = FloatingPoint(exponentWidth: 5, mantissaWidth: 2);
final dut = FloatToFixed(float);
await dut.build();
for (var val = 0; val < pow(2, 8); val++) {
final fpv = FloatingPointValue.ofLogicValue(
5, 2, LogicValue.ofInt(val, float.width));
if (!fpv.isNaN()) {
float.put(fpv);
final fxp = dut.fixed;
final fxpExp = FixedPointValue.ofDouble(fpv.toDouble(),
signed: true, m: dut.m, n: dut.n);
expect(fxp.value.bitString, fxpExp.value.bitString);
}
}
});

test('FP8toINT: exhaustive', () async {
final float = Logic(width: 8);
final mode = Logic();
final dut = Float8ToFixed(float, mode);
await dut.build();

// E4M3
mode.put(1);
for (var val = 0; val < pow(2, 8); val++) {
final fp8 = FloatingPointValue.ofLogicValue(
4, 3, LogicValue.ofInt(val, float.width));
if (!fp8.isNaN()) {
float.put(fp8.value);
final fx8 =
FixedPointValue.ofDouble(fp8.toDouble(), signed: true, m: 23, n: 9);
expect(dut.fixed.value.bitString, fx8.value.bitString);
expect(dut.q23p9.value, fx8.value);
}
}

// E5M2
mode.put(0);
for (var val = 0; val < pow(2, 8); val++) {
final fp8 = FloatingPointValue.ofLogicValue(
5, 2, LogicValue.ofInt(val, float.width));
if (!fp8.isNaN()) {
float.put(fp8.value);
final fx8 = FixedPointValue.ofDouble(fp8.toDouble(),
signed: true, m: 16, n: 16);
expect(dut.fixed.value.bitString, fx8.value.bitString);
expect(dut.q16p16.value, fx8.value);
}
}
});
}

0 comments on commit 5bf9681

Please sign in to comment.