Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrong codegen for a nested enum match with --opt-level > 0 #15793

Closed
ghost opened this issue Jul 18, 2014 · 10 comments
Closed

Wrong codegen for a nested enum match with --opt-level > 0 #15793

ghost opened this issue Jul 18, 2014 · 10 comments

Comments

@ghost
Copy link

ghost commented Jul 18, 2014

I hit this in my static inlining PR (#15650) but it turns out it's an existing bug in master and it seems bad.

enum NestedEnum {
    First,
    Second,
    Third
}
enum Enum {
    Variant1(bool),
    Variant2(NestedEnum)
}

fn main() {
    std::os::set_exit_status(match Variant2(Third) {
        Variant1(true) => 1i,
        Variant1(false) => 2,
        Variant2(Second) => 3,
        Variant2(Third) => 4,
        Variant2(First) => 5
    });
}

This should exit with code 4 but instead exits with 5. --opt-level 0 is fine. 0.11 is fine as well. Investigating further.

@ghost
Copy link
Author

ghost commented Jul 18, 2014

Couldn't reproduce it with the NestedEnum not being a C-like enum.

@luqmana Might this be related to your recent optimizations?

@ghost
Copy link
Author

ghost commented Jul 18, 2014

IR diff between 0.11 and master:

--- 0.11.ll 2014-07-18 23:38:18.000000000 +0200
+++ master.ll   2014-07-18 23:37:50.000000000 +0200
@@ -1,20 +1,21 @@
 %"enum.Enum<[]>" = type { i8, [0 x i8], [1 x i8] }

 ; Function Attrs: inlinehint
-define internal %"enum.Enum<[]>" @_ZN8Variant120h2026663588c21502jaa4v0.0E(i1 zeroext) unnamed_addr #0 {
+define internal %"enum.Enum<[]>" @_ZN8Variant120he59273a13bfc9b2fjaaE(i1 zeroext) unnamed_addr #0 {
 entry-block:
   %__make_return_pointer = alloca %"enum.Enum<[]>"
   %1 = getelementptr inbounds %"enum.Enum<[]>"* %__make_return_pointer, i32 0, i32 0
   store i8 0, i8* %1
-  %2 = bitcast %"enum.Enum<[]>"* %__make_return_pointer to { i8, i1 }*
-  %3 = getelementptr inbounds { i8, i1 }* %2, i32 0, i32 1
-  store i1 %0, i1* %3
-  %4 = load %"enum.Enum<[]>"* %__make_return_pointer
-  ret %"enum.Enum<[]>" %4
+  %2 = bitcast %"enum.Enum<[]>"* %__make_return_pointer to { i8, i8 }*
+  %3 = getelementptr inbounds { i8, i8 }* %2, i32 0, i32 1
+  %4 = zext i1 %0 to i8
+  store i8 %4, i8* %3
+  %5 = load %"enum.Enum<[]>"* %__make_return_pointer
+  ret %"enum.Enum<[]>" %5
 }

 ; Function Attrs: inlinehint
-define internal %"enum.Enum<[]>" @_ZN8Variant220h4237def28a143758naa4v0.0E(i8) unnamed_addr #0 {
+define internal %"enum.Enum<[]>" @_ZN8Variant220he630b7b489d87862naaE(i8) unnamed_addr #0 {
 entry-block:
   %__make_return_pointer = alloca %"enum.Enum<[]>"
   %1 = getelementptr inbounds %"enum.Enum<[]>"* %__make_return_pointer, i32 0, i32 0
@@ -27,7 +28,7 @@
 }

 ; Function Attrs: uwtable
-define internal void @_ZN4main20h22638fb321d781d6raa4v0.0E() unnamed_addr #1 {
+define internal void @_ZN4main20hd74a607d35282c19raaE() unnamed_addr #1 {
 entry-block:
   %0 = alloca i64
   %1 = alloca %"enum.Enum<[]>"
@@ -35,7 +36,7 @@
   %match = alloca %"enum.Enum<[]>"
   store i8 2, i8* %2
   %3 = load i8* %2
-  %4 = call %"enum.Enum<[]>" @_ZN8Variant220h4237def28a143758naa4v0.0E(i8 %3)
+  %4 = call %"enum.Enum<[]>" @_ZN8Variant220he630b7b489d87862naaE(i8 %3)
   store %"enum.Enum<[]>" %4, %"enum.Enum<[]>"* %1
   %5 = load %"enum.Enum<[]>"* %1
   store %"enum.Enum<[]>" %5, %"enum.Enum<[]>"* %match
@@ -75,11 +76,12 @@
   ]

 match_case:                                       ; preds = %entry-block
-  %11 = bitcast %"enum.Enum<[]>"* %match to { i8, i1 }*
-  %12 = getelementptr inbounds { i8, i1 }* %11, i32 0, i32 1
-  %13 = load i1* %12
-  %14 = icmp eq i1 %13, true
-  br i1 %14, label %match_case6, label %compare_next
+  %11 = bitcast %"enum.Enum<[]>"* %match to { i8, i8 }*
+  %12 = getelementptr inbounds { i8, i8 }* %11, i32 0, i32 1
+  %13 = load i8* %12, !range !0
+  %14 = trunc i8 %13 to i1
+  %15 = icmp eq i1 %14, true
+  br i1 %15, label %match_case6, label %compare_next

 match_else5:                                      ; preds = %compare_next
   br label %case_body1
@@ -100,20 +102,20 @@
   br label %case_body3

 join:                                             ; preds = %case_body4, %case_body3, %case_body2, %case_body1, %case_body
-  %15 = load i64* %0
-  call void @_ZN2os15set_exit_status20ha1905d2c01844302Ivg7v0.11.0E(i64 %15)
+  %16 = load i64* %0
+  call void @_ZN2os15set_exit_status20h6ce0af8bbd777aa29zgE(i64 %16)
   ret void
 }

 define i64 @main(i64, i8**) unnamed_addr #2 {
 top:
-  %2 = call i64 @_ZN10lang_start20hfd2bc187bf8dda06Fme7v0.11.0E(i8* bitcast (void ()* @_ZN4main20h22638fb321d781d6raa4v0.0E to i8*), i64 %0, i8** %1)
+  %2 = call i64 @_ZN10lang_start20hea9a1813d80388d7WjeE(i8* bitcast (void ()* @_ZN4main20hd74a607d35282c19raaE to i8*), i64 %0, i8** %1)
   ret i64 %2
 }

-declare i64 @_ZN10lang_start20hfd2bc187bf8dda06Fme7v0.11.0E(i8*, i64, i8**) unnamed_addr #2
+declare i64 @_ZN10lang_start20hea9a1813d80388d7WjeE(i8*, i64, i8**) unnamed_addr #2

-declare void @_ZN2os15set_exit_status20ha1905d2c01844302Ivg7v0.11.0E(i64) unnamed_addr #2
+declare void @_ZN2os15set_exit_status20h6ce0af8bbd777aa29zgE(i64) unnamed_addr #2

 attributes #0 = { inlinehint "split-stack" }
 attributes #1 = { uwtable "split-stack" }

@ghost
Copy link
Author

ghost commented Jul 18, 2014

This (--opt-level 0):

match_else:                                       ; preds = %entry-block
  %8 = bitcast %"enum.Enum<[]>"* %match to { i8, i8 }*
  %9 = getelementptr inbounds { i8, i8 }* %8, i32 0, i32 1
  %10 = load i8* %9, !range !1
  switch i8 %10, label %match_else7 [
    i8 1, label %match_case8
    i8 2, label %match_case9
  ]

gets optimised to:

match_else:                                       ; preds = %entry-block
  %cond10 = icmp eq i8 %.fca.2.0.extract12, 1
  %. = select i1 %cond10, i64 3, i64 5
  br label %join

Not sure why. The !range on the load in the unoptimised version is correct.

@ghost
Copy link
Author

ghost commented Jul 18, 2014

I think what's key here is the interaction between Variant1(bool) using i1 for SSA values and Variant2(NestedEnum) using i8. I wonder if LLVM thinks it can truncate the latter into an i1 just because we do so for the former.

cc @dotdash

@luqmana
Copy link
Member

luqmana commented Jul 18, 2014

@jakub- I don't think it's related to that match optimization since there are no bindings in this match. All the differences in the IR seem to be related to the bool as i1 changes.

@dotdash
Copy link
Contributor

dotdash commented Jul 18, 2014

simplifycfg turns this:

match_else:                                       ; preds = %entry-block
  %8 = bitcast %"enum.Enum<[]>"* %match to { i8, i8 }*
  %9 = getelementptr inbounds { i8, i8 }* %8, i32 0, i32 1
  %10 = load i8* %9, !range !1
  switch i8 %10, label %match_else7 [
    i8 1, label %match_case8
    i8 2, label %match_case9
  ]

into this:

match_else:                                       ; preds = %entry-block
  %cond1 = icmp eq i8 %10, 1
  br i1 %cond1, label %match_case8, label %match_else7

looks wrong to me.

@dotdash
Copy link
Contributor

dotdash commented Jul 18, 2014

LLVM bug, going to submit a fix.

@dotdash
Copy link
Contributor

dotdash commented Jul 19, 2014

Submitted as http://reviews.llvm.org/D4595

@ghost
Copy link
Author

ghost commented Jul 19, 2014

@dotdash Great! Should we add it to Rust's fork of LLVM?

@dotdash
Copy link
Contributor

dotdash commented Jul 19, 2014

I expect the patch to be accepted as is, so yeah, that would be an option to get the fix in rustc early.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
2 participants