From 256abfec8a323df1b6491427ba60702738339db2 Mon Sep 17 00:00:00 2001 From: Alexey Gerenkov Date: Thu, 28 Mar 2024 16:30:43 +0300 Subject: [PATCH] [Xtensa] Add fp16 conversion support Close https://github.com/espressif/llvm-project/issues/91 --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 13 +- llvm/test/CodeGen/Xtensa/fp16.ll | 166 ++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Xtensa/fp16.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 2260ccc43ee428..1a865b988f3386 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -379,11 +379,22 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm, // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant // would fit in an f80). - for (MVT VT : MVT::fp_valuetypes()) + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); + } + + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); // Implement custom stack allocations setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); diff --git a/llvm/test/CodeGen/Xtensa/fp16.ll b/llvm/test/CodeGen/Xtensa/fp16.ll new file mode 100644 index 00000000000000..297bb71830cc48 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/fp16.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa -mcpu=esp32 < %s | FileCheck --check-prefix=CHECK-ESP32 %s +; RUN: llc -mtriple=xtensa -mcpu=esp32s3 < %s | FileCheck --check-prefix=CHECK-ESP32S3 %s +; RUN: llc -mtriple=xtensa -mcpu=esp32s2 < %s | FileCheck --check-prefix=CHECK-ESP32S2 %s + +target datalayout = "e-m:e-p:32:32-v1:8:8-i64:64-i128:128-n32" +target triple = "xtensa" + +@x = global i16 12902 +@y = global i16 0 +@z = common global i16 0 + +define void @foo() nounwind { +; CHECK-ESP32-LABEL: foo: +; CHECK-ESP32: entry a1, 32 +; CHECK-ESP32-NEXT: l32r a6, .LCPI0_0 +; CHECK-ESP32-NEXT: l16ui a10, a6, 0 +; CHECK-ESP32-NEXT: l32r a5, .LCPI0_1 +; CHECK-ESP32-NEXT: callx8 a5 +; CHECK-ESP32-NEXT: mov.n a7, a10 +; CHECK-ESP32-NEXT: l32r a8, .LCPI0_2 +; CHECK-ESP32-NEXT: l16ui a10, a8, 0 +; CHECK-ESP32-NEXT: callx8 a5 +; CHECK-ESP32-NEXT: wfr f8, a10 +; CHECK-ESP32-NEXT: wfr f9, a7 +; CHECK-ESP32-NEXT: add.s f8, f9, f8 +; CHECK-ESP32-NEXT: rfr a10, f8 +; CHECK-ESP32-NEXT: l32r a8, .LCPI0_3 +; CHECK-ESP32-NEXT: callx8 a8 +; CHECK-ESP32-NEXT: s16i a10, a6, 0 +; CHECK-ESP32-NEXT: retw.n +; +; CHECK-ESP32S3-LABEL: foo: +; CHECK-ESP32S3: entry a1, 32 +; CHECK-ESP32S3-NEXT: l32r a6, .LCPI0_0 +; CHECK-ESP32S3-NEXT: l16ui a10, a6, 0 +; CHECK-ESP32S3-NEXT: l32r a5, .LCPI0_1 +; CHECK-ESP32S3-NEXT: callx8 a5 +; CHECK-ESP32S3-NEXT: mov.n a7, a10 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_2 +; CHECK-ESP32S3-NEXT: l16ui a10, a8, 0 +; CHECK-ESP32S3-NEXT: callx8 a5 +; CHECK-ESP32S3-NEXT: wfr f8, a10 +; CHECK-ESP32S3-NEXT: wfr f9, a7 +; CHECK-ESP32S3-NEXT: add.s f8, f9, f8 +; CHECK-ESP32S3-NEXT: rfr a10, f8 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_3 +; CHECK-ESP32S3-NEXT: callx8 a8 +; CHECK-ESP32S3-NEXT: s16i a10, a6, 0 +; CHECK-ESP32S3-NEXT: retw.n +; +; CHECK-ESP32S2-LABEL: foo: +; CHECK-ESP32S2: entry a1, 32 +; CHECK-ESP32S2-NEXT: l32r a6, .LCPI0_0 +; CHECK-ESP32S2-NEXT: l16ui a10, a6, 0 +; CHECK-ESP32S2-NEXT: l32r a5, .LCPI0_1 +; CHECK-ESP32S2-NEXT: callx8 a5 +; CHECK-ESP32S2-NEXT: mov.n a7, a10 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_2 +; CHECK-ESP32S2-NEXT: l16ui a10, a8, 0 +; CHECK-ESP32S2-NEXT: callx8 a5 +; CHECK-ESP32S2-NEXT: mov.n a11, a10 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_3 +; CHECK-ESP32S2-NEXT: mov.n a10, a7 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: l32r a7, .LCPI0_4 +; CHECK-ESP32S2-NEXT: callx8 a7 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_5 +; CHECK-ESP32S2-NEXT: and a10, a10, a8 +; CHECK-ESP32S2-NEXT: callx8 a5 +; CHECK-ESP32S2-NEXT: callx8 a7 +; CHECK-ESP32S2-NEXT: s16i a10, a6, 0 +; CHECK-ESP32S2-NEXT: retw.n +entry: + %0 = load i16, ptr @x, align 2 + %1 = load i16, ptr @y, align 2 + %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0) + %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1) + %4 = fadd float %2, %3 + %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4) + store i16 %5, ptr @x, align 2 + ret void +} + +define double @test_from_fp16(i16 %in) { +; CHECK-ESP32-LABEL: test_from_fp16: +; CHECK-ESP32: entry a1, 32 +; CHECK-ESP32-NEXT: l32r a8, .LCPI1_0 +; CHECK-ESP32-NEXT: mov.n a10, a2 +; CHECK-ESP32-NEXT: callx8 a8 +; CHECK-ESP32-NEXT: l32r a8, .LCPI1_1 +; CHECK-ESP32-NEXT: callx8 a8 +; CHECK-ESP32-NEXT: mov.n a2, a10 +; CHECK-ESP32-NEXT: mov.n a3, a11 +; CHECK-ESP32-NEXT: retw.n +; +; CHECK-ESP32S3-LABEL: test_from_fp16: +; CHECK-ESP32S3: entry a1, 32 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_0 +; CHECK-ESP32S3-NEXT: mov.n a10, a2 +; CHECK-ESP32S3-NEXT: callx8 a8 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_1 +; CHECK-ESP32S3-NEXT: callx8 a8 +; CHECK-ESP32S3-NEXT: mov.n a2, a10 +; CHECK-ESP32S3-NEXT: mov.n a3, a11 +; CHECK-ESP32S3-NEXT: retw.n +; +; CHECK-ESP32S2-LABEL: test_from_fp16: +; CHECK-ESP32S2: entry a1, 32 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_0 +; CHECK-ESP32S2-NEXT: and a10, a2, a8 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_1 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_2 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: mov.n a2, a10 +; CHECK-ESP32S2-NEXT: mov.n a3, a11 +; CHECK-ESP32S2-NEXT: retw.n + %val = call double @llvm.convert.from.fp16.f64(i16 %in) + ret double %val +} + +define i16 @test_to_fp16(double %in) { +; CHECK-ESP32-LABEL: test_to_fp16: +; CHECK-ESP32: entry a1, 32 +; CHECK-ESP32-NEXT: l32r a8, .LCPI2_0 +; CHECK-ESP32-NEXT: mov.n a10, a2 +; CHECK-ESP32-NEXT: mov.n a11, a3 +; CHECK-ESP32-NEXT: callx8 a8 +; CHECK-ESP32-NEXT: l32r a8, .LCPI2_1 +; CHECK-ESP32-NEXT: and a2, a10, a8 +; CHECK-ESP32-NEXT: retw.n +; +; CHECK-ESP32S3-LABEL: test_to_fp16: +; CHECK-ESP32S3: entry a1, 32 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_0 +; CHECK-ESP32S3-NEXT: mov.n a10, a2 +; CHECK-ESP32S3-NEXT: mov.n a11, a3 +; CHECK-ESP32S3-NEXT: callx8 a8 +; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_1 +; CHECK-ESP32S3-NEXT: and a2, a10, a8 +; CHECK-ESP32S3-NEXT: retw.n +; +; CHECK-ESP32S2-LABEL: test_to_fp16: +; CHECK-ESP32S2: entry a1, 32 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_0 +; CHECK-ESP32S2-NEXT: mov.n a10, a2 +; CHECK-ESP32S2-NEXT: mov.n a11, a3 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_1 +; CHECK-ESP32S2-NEXT: and a10, a10, a8 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_2 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_3 +; CHECK-ESP32S2-NEXT: callx8 a8 +; CHECK-ESP32S2-NEXT: mov.n a2, a10 +; CHECK-ESP32S2-NEXT: retw.n + %val = call i16 @llvm.convert.to.fp16.f64(double %in) + ret i16 %val +} + +declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone +declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone + +declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone +declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone