From 86577879c843c6372de6f3a9cb870ed04ded161b Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Aug 2024 08:25:49 +0200
Subject: [PATCH 1/3] Update test suite

---
 test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test b/test
index 02fc1f752..34ba42654 160000
--- a/test
+++ b/test
@@ -1 +1 @@
-Subproject commit 02fc1f752101db18e8ac0b103a0e1222a7f94702
+Subproject commit 34ba42654dd841f043698b1ee0e9e1700beb33e9

From 08851d2125ad39f29835cbbe1e2aa3d3608486be Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Aug 2024 08:26:19 +0200
Subject: [PATCH 2/3] Stronger result about the nonaliasing test `pdisjoint`

The two abstract pointers being compared can come from different block
classifications, as long as the two classifications agree on globals
and on the stack pointer.
---
 backend/ValueDomain.v | 123 ++++++++++++++++++++++++++++++------------
 1 file changed, 88 insertions(+), 35 deletions(-)

diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 98d6736b8..05bf047d4 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -476,41 +476,6 @@ Proof.
   try (destruct (peq id id0));  try constructor; try (apply cmp_different_blocks_none).
 Qed.
 
-Definition pdisjoint (p1: aptr) (sz1: Z) (p2: aptr) (sz2: Z) : bool :=
-  match p1, p2 with
-  | Pbot, _ => true
-  | _, Pbot => true
-  | Gl id1 ofs1, Gl id2 ofs2 =>
-      if peq id1 id2
-      then zle (Ptrofs.unsigned ofs1 + sz1) (Ptrofs.unsigned ofs2)
-           || zle (Ptrofs.unsigned ofs2 + sz2) (Ptrofs.unsigned ofs1)
-      else true
-  | Gl id1 ofs1, Glo id2 => negb(peq id1 id2)
-  | Glo id1, Gl id2 ofs2 => negb(peq id1 id2)
-  | Glo id1, Glo id2 => negb(peq id1 id2)
-  | Stk ofs1, Stk ofs2 =>
-      zle (Ptrofs.unsigned ofs1 + sz1) (Ptrofs.unsigned ofs2)
-      || zle (Ptrofs.unsigned ofs2 + sz2) (Ptrofs.unsigned ofs1)
-  | (Gl _ _ | Glo _ | Glob | Nonstack), (Stk _ | Stack) => true
-  | (Stk _ | Stack), (Gl _ _ | Glo _ | Glob | Nonstack) => true
-  | _, _ => false
-  end.
-
-Lemma pdisjoint_sound:
-  forall sz1 b1 ofs1 p1 sz2 b2 ofs2 p2,
-  pdisjoint p1 sz1 p2 sz2 = true ->
-  pmatch b1 ofs1 p1 -> pmatch b2 ofs2 p2 ->
-  b1 <> b2 \/ Ptrofs.unsigned ofs1 + sz1 <= Ptrofs.unsigned ofs2 \/ Ptrofs.unsigned ofs2 + sz2 <= Ptrofs.unsigned ofs1.
-Proof.
-  intros. inv H0; inv H1; simpl in H; try discriminate; try (left; congruence).
-- destruct (peq id id0). subst id0. destruct (orb_true_elim _ _ H); InvBooleans; auto.
-  left; congruence.
-- destruct (peq id id0); try discriminate. left; congruence.
-- destruct (peq id id0); try discriminate. left; congruence.
-- destruct (peq id id0); try discriminate. left; congruence.
-- destruct (orb_true_elim _ _ H); InvBooleans; auto.
-Qed.
-
 (** * Abstracting values *)
 
 Inductive aval : Type :=
@@ -4496,6 +4461,94 @@ Qed.
 
 End MATCH.
 
+(** * Nonaliasing tests. *)
+
+(** [pdisjoint p1 sz1 p2 sz2] returns [true] if there can be no overlap
+    between a block of size [sz1] in the region characterized by [p1]
+    and a block of size [sz2] in the region characterized by [p2]. *)
+
+Definition pdisjoint (p1: aptr) (sz1: Z) (p2: aptr) (sz2: Z) : bool :=
+  match p1, p2 with
+  | Pbot, _ => true
+  | _, Pbot => true
+  | Gl id1 ofs1, Gl id2 ofs2 =>
+      if peq id1 id2
+      then zle (Ptrofs.unsigned ofs1 + sz1) (Ptrofs.unsigned ofs2)
+           || zle (Ptrofs.unsigned ofs2 + sz2) (Ptrofs.unsigned ofs1)
+      else true
+  | Gl id1 ofs1, Glo id2 => negb(peq id1 id2)
+  | Glo id1, Gl id2 ofs2 => negb(peq id1 id2)
+  | Glo id1, Glo id2 => negb(peq id1 id2)
+  | Stk ofs1, Stk ofs2 =>
+      zle (Ptrofs.unsigned ofs1 + sz1) (Ptrofs.unsigned ofs2)
+      || zle (Ptrofs.unsigned ofs2 + sz2) (Ptrofs.unsigned ofs1)
+  | (Gl _ _ | Glo _ | Glob | Nonstack), (Stk _ | Stack) => true
+  | (Stk _ | Stack), (Gl _ _ | Glo _ | Glob | Nonstack) => true
+  | _, _ => false
+  end.
+
+Lemma pdisjoint_sound:
+  forall (bc: block_classification) sz1 b1 ofs1 p1 sz2 b2 ofs2 p2,
+  pdisjoint p1 sz1 p2 sz2 = true ->
+  pmatch bc b1 ofs1 p1 -> pmatch bc b2 ofs2 p2 ->
+  b1 <> b2 \/ Ptrofs.unsigned ofs1 + sz1 <= Ptrofs.unsigned ofs2 \/ Ptrofs.unsigned ofs2 + sz2 <= Ptrofs.unsigned ofs1.
+Proof.
+  intros. inv H0; inv H1; simpl in H; try discriminate; try (left; congruence).
+- destruct (peq id id0). subst id0. destruct (orb_true_elim _ _ H); InvBooleans; auto.
+  left; congruence.
+- destruct (peq id id0); try discriminate. left; congruence.
+- destruct (peq id id0); try discriminate. left; congruence.
+- destruct (peq id id0); try discriminate. left; congruence.
+- destruct (orb_true_elim _ _ H); InvBooleans; auto.
+Qed.
+
+(** This is a stronger property of [pdisjoint], guaranteeing nonaliasing
+    even if the two pointers are considered in different, but compatible
+    block classifications. *)
+
+Lemma pdisjoint_sound_strong:
+  forall sz1 b1 ofs1 bc1 p1 sz2 b2 ofs2 bc2 p2 ge sp,
+  pdisjoint p1 sz1 p2 sz2 = true ->
+  pmatch bc1 b1 ofs1 p1 -> pmatch bc2 b2 ofs2 p2 ->
+  genv_match bc1 ge -> bc1 sp = BCstack ->
+  genv_match bc2 ge -> bc2 sp = BCstack ->
+  b1 <> b2 \/ Ptrofs.unsigned ofs1 + sz1 <= Ptrofs.unsigned ofs2 \/ Ptrofs.unsigned ofs2 + sz2 <= Ptrofs.unsigned ofs1.
+Proof.
+  assert (GLOB_GLOB: forall (bc1 bc2: block_classification) ge b1 b2 id1 id2,
+           genv_match bc1 ge -> genv_match bc2 ge ->
+           bc1 b1 = BCglob id1 -> bc2 b2 = BCglob id2 ->
+           id1 <> id2 -> b1 <> b2).
+  { intros until id2; intros GE1 GE2 EQ1 EQ2 DIFF.
+    apply GE1 in EQ1; apply GE2 in EQ2.
+    apply Genv.find_invert_symbol in EQ1, EQ2.
+    congruence. }
+  assert (GLOB_STACK: forall (bc1 bc2: block_classification) ge sp b1 b2 id,
+           genv_match bc1 ge -> bc1 sp = BCstack -> bc2 sp = BCstack ->
+           bc1 b1 = BCglob id -> bc2 b2 = BCstack ->
+           b1 <> b2).
+  { intros until id; intros GE1 SP1 SP2 EQ1 EQ2.
+    apply GE1 in EQ1.
+    assert (bc1 b1 <> BCstack) by (apply GE1; eapply (Senv.find_symbol_below ge); eauto).
+    assert (b2 = sp) by (eapply bc2.(bc_stack); eauto).
+    congruence. }
+  assert (STACK_OTHER: forall (bc1 bc2: block_classification) sp b1 b2,
+           bc1 sp = BCstack -> bc2 sp = BCstack ->
+           bc1 b1 = BCstack -> bc2 b2 <> BCstack ->
+           b1 <> b2).
+  { intros until b2; intros SP1 SP2 EQ1 EQ2.
+    assert (b1 = sp) by (eapply bc1.(bc_stack); eauto).
+    congruence. }
+  intros until sp; intros DISJ PM1 PM2 GE1 SP1 GE2 SP2.
+  inv PM1; inv PM2; simpl in DISJ; try discriminate; eauto using not_eq_sym.
+  - destruct (peq id id0).
+    + subst id0. destruct (orb_true_elim _ _ DISJ); InvBooleans; auto.
+    + eauto.
+  - destruct (peq id id0); discriminate || eauto.
+  - destruct (peq id id0); discriminate || eauto.
+  - destruct (peq id id0); discriminate || eauto.
+  - destruct (orb_true_elim _ _ DISJ); InvBooleans; auto.
+Qed.
+
 (** * Monotonicity properties when the block classification changes. *)
 
 Lemma genv_match_exten:

From 2458d07ef8b68bb3c0612d569fdcc9853d3b2057 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Aug 2024 08:27:41 +0200
Subject: [PATCH 3/3] CSE: remember the abstract pointers in Load equations

This is simpler than recomputing abstract pointers in the `kill_loads`
functions.  This is also more precise, as some results of value
analysis may no longer be available at the PC where a `kill_loads`
function is invoked.
---
 backend/CSE.v       |  38 ++++-----
 backend/CSEdomain.v |  63 +++++++++++++--
 backend/CSEproof.v  | 187 +++++++++++++++++++++++---------------------
 3 files changed, 175 insertions(+), 113 deletions(-)

diff --git a/backend/CSE.v b/backend/CSE.v
index dc347f725..ee7c353d9 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -77,7 +77,7 @@ Fixpoint find_valnum_rhs (r: rhs) (eqs: list equation)
   match eqs with
   | nil => None
   | Eq v str r' :: eqs1 =>
-      if str && eq_rhs r r' then Some v else find_valnum_rhs r eqs1
+      if str && compat_rhs r r' then Some v else find_valnum_rhs r eqs1
   end.
 
 (** [find_valnum_rhs' rhs eqs] is similar, but also accepts equations
@@ -88,7 +88,7 @@ Fixpoint find_valnum_rhs' (r: rhs) (eqs: list equation)
   match eqs with
   | nil => None
   | Eq v str r' :: eqs1 =>
-      if eq_rhs r r' then Some v else find_valnum_rhs' r eqs1
+      if compat_rhs r r' then Some v else find_valnum_rhs' r eqs1
   end.
 
 (** [find_valnum_num vn eqs] searches the list of equations [eqs]
@@ -203,9 +203,9 @@ Definition add_op (n: numbering) (rd: reg) (op: operation) (rs: list reg) :=
 
 Definition add_load (n: numbering) (rd: reg)
                     (chunk: memory_chunk) (addr: addressing)
-                    (rs: list reg) :=
+                    (rs: list reg) (p: aptr) :=
   let (n1, vs) := valnum_regs n rs in
-  add_rhs n1 rd (Load chunk addr vs).
+  add_rhs n1 rd (Load chunk addr vs p).
 
 (** [set_unknown n rd] returns a numbering where [rd] is mapped to
   no value number, and no equations are added.  This is useful
@@ -246,7 +246,7 @@ Definition kill_equations (pred: rhs -> bool) (n: numbering) : numbering :=
 Definition filter_loads (r: rhs) : bool :=
   match r with
   | Op op _ => op_depends_on_memory op
-  | Load _ _ _ => true
+  | Load _ _ _ _ => true
   end.
 
 Definition kill_all_loads (n: numbering) : numbering :=
@@ -258,23 +258,19 @@ Definition kill_all_loads (n: numbering) : numbering :=
   from this store are preserved.  Equations involving memory-dependent
   operators are also removed. *)
 
-Definition filter_after_store (app: VA.t) (n: numbering) (p: aptr) (sz: Z) (r: rhs) :=
+Definition filter_after_store (n: numbering) (p: aptr) (sz: Z) (r: rhs) :=
   match r with
   | Op op vl =>
       op_depends_on_memory op
-  | Load chunk addr vl =>
-      match regs_valnums n vl with
-      | None => true
-      | Some rl =>
-          negb (pdisjoint (aaddressing app addr rl) (size_chunk chunk) p sz)
-      end
+  | Load chunk addr vl q =>
+      negb (pdisjoint q (size_chunk chunk) p sz)
   end.
 
 Definition kill_loads_after_store
              (app: VA.t) (n: numbering)
              (chunk: memory_chunk) (addr: addressing) (args: list reg) :=
   let p := aaddressing app addr args in
-  kill_equations (filter_after_store app n p (size_chunk chunk)) n.
+  kill_equations (filter_after_store n p (size_chunk chunk)) n.
 
 (** [add_store_result n chunk addr rargs rsrc] updates the numbering [n]
   to reflect the knowledge gained after executing an instruction
@@ -298,7 +294,7 @@ Definition add_store_result (app: VA.t) (n: numbering) (chunk: memory_chunk) (ad
     let (n1, vsrc) := valnum_reg n rsrc in
     let (n2, vargs) := valnum_regs n1 rargs in
     {| num_next := n2.(num_next);
-       num_eqs  := Eq vsrc false (Load chunk addr vargs) :: n2.(num_eqs);
+       num_eqs  := Eq vsrc false (Load chunk addr vargs (aaddressing app addr rargs)) :: n2.(num_eqs);
        num_reg  := n2.(num_reg);
        num_val  := n2.(num_val) |}
   else n.
@@ -310,8 +306,8 @@ Definition add_store_result (app: VA.t) (n: numbering) (chunk: memory_chunk) (ad
   operators are also removed. *)
 
 Definition kill_loads_after_storebytes
-             (app: VA.t) (n: numbering) (dst: aptr) (sz: Z) :=
-  kill_equations (filter_after_store app n dst sz) n.
+             (n: numbering) (dst: aptr) (sz: Z) :=
+  kill_equations (filter_after_store n dst sz) n.
 
 (** [add_memcpy app n1 n2 rsrc rdst sz] adds equations to [n2] that
   represent the effect of a [memcpy] block copy operation of [sz] bytes
@@ -327,7 +323,7 @@ Definition kill_loads_after_storebytes
 
 Definition shift_memcpy_eq (src sz delta: Z) (e: equation) :=
   match e with
-  | Eq l strict (Load chunk (Ainstack i) _) =>
+  | Eq l strict (Load chunk (Ainstack i) _ _) =>
       let i := Ptrofs.unsigned i in
       let j := i + delta in
       if zle src i
@@ -335,7 +331,7 @@ Definition shift_memcpy_eq (src sz delta: Z) (e: equation) :=
       && zeq (Z.modulo delta (align_chunk chunk)) 0
       && zle 0 j
       && zle j Ptrofs.max_unsigned
-      then Some(Eq l strict (Load chunk (Ainstack (Ptrofs.repr j)) nil))
+      then Some(Eq l strict (Load chunk (Ainstack (Ptrofs.repr j)) nil (Stk (Ptrofs.repr j))))
       else None
   | _ => None
   end.
@@ -461,7 +457,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
       | Iop op args res s =>
           add_op before res op args
       | Iload chunk addr args dst s =>
-          add_load before dst chunk addr args
+          add_load before dst chunk addr args (aaddressing approx!!pc addr args)
       | Istore chunk addr args src s =>
           let app := approx!!pc in
           let n := kill_loads_after_store app before chunk addr args in
@@ -487,7 +483,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
                   let app := approx!!pc in
                   let adst := aaddr_arg app dst in
                   let asrc := aaddr_arg app src in
-                  let n := kill_loads_after_storebytes app before adst sz in
+                  let n := kill_loads_after_storebytes before adst sz in
                   set_res_unknown (add_memcpy before n asrc adst sz) res
               | _ =>
                   empty_numbering
@@ -537,7 +533,7 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
         end
   | Iload chunk addr args dst s =>
       let (n1, vl) := valnum_regs n args in
-      match find_rhs n1 (Load chunk addr vl) with
+      match find_rhs n1 (Load chunk addr vl Ptop) with
       | Some r =>
           Iop Omove (r :: nil) dst s
       | None =>
diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v
index 9eb0bc80d..cd4dc39da 100644
--- a/backend/CSEdomain.v
+++ b/backend/CSEdomain.v
@@ -16,6 +16,7 @@
 Require Import Coqlib Maps.
 Require Import AST Integers Values Memory.
 Require Import Op Registers RTL.
+Require Import ValueDomain.
 
 (** Value numbers are represented by positive integers.  Equations are
   of the form [valnum = rhs] or [valnum >= rhs], where the right-hand
@@ -27,7 +28,7 @@ Definition valnum := positive.
 
 Inductive rhs : Type :=
   | Op: operation -> list valnum -> rhs
-  | Load: memory_chunk -> addressing -> list valnum -> rhs.
+  | Load: memory_chunk -> addressing -> list valnum -> aptr -> rhs.
 
 Inductive equation : Type :=
   | Eq (v: valnum) (strict: bool) (r: rhs).
@@ -38,10 +39,39 @@ Definition eq_list_valnum: forall (x y: list valnum), {x=y}+{x<>y} := list_eq_de
 
 Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}.
 Proof.
-  generalize chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
+  generalize chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum eq_aptr.
   decide equality.
 Defined.
 
+(** Equality of [rhs] up to differences in regions attached to [Load] rhs. *)
+
+Inductive rhs_compat: rhs -> rhs -> Prop :=
+  | rhs_compat_op: forall op vl,
+      rhs_compat (Op op vl) (Op op vl)
+  | rhs_compat_load: forall chunk addr vl p1 p2,
+      rhs_compat (Load chunk addr vl p1) (Load chunk addr vl p2).
+
+Lemma rhs_compat_sym: forall rh1 rh2,
+  rhs_compat rh1 rh2 -> rhs_compat rh2 rh1.
+Proof.
+  destruct 1; constructor; auto.
+Qed.
+
+Definition compat_rhs (r1 r2: rhs) : bool :=
+  match r1, r2 with
+  | Op op1 vl1, Op op2 vl2 => eq_operation op1 op2 && eq_list_valnum vl1 vl2
+  | Load chunk1 addr1 vl1 p1, Load chunk2 addr2 vl2 p2 =>
+      chunk_eq chunk1 chunk2 && eq_addressing addr1 addr2 && eq_list_valnum vl1 vl2
+  | _, _ => false
+  end.
+
+Lemma compat_rhs_sound: forall r1 r2,
+  compat_rhs r1 r2 = true -> rhs_compat r1 r2.
+Proof.
+  unfold compat_rhs; intros; destruct r1, r2; try discriminate;
+  InvBooleans; subst; constructor.
+Qed.
+  
 (** A value numbering is a collection of equations between value numbers
   plus a partial map from registers to value numbers.  Additionally,
   we maintain the next unused value number, so as to easily generate
@@ -69,7 +99,7 @@ Definition empty_numbering :=
 Definition valnums_rhs (r: rhs): list valnum :=
   match r with
   | Op op vl => vl
-  | Load chunk addr vl => vl
+  | Load chunk addr vl ap => vl
   end.
 
 Definition wf_rhs (next: valnum) (r: rhs) : Prop :=
@@ -101,18 +131,41 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem):
   | op_eval_to: forall op vl v,
       eval_operation ge sp op (map valu vl) m = Some v ->
       rhs_eval_to valu ge sp m (Op op vl) v
-  | load_eval_to: forall chunk addr vl a v,
+  | load_eval_to: forall chunk addr vl a v p,
       eval_addressing ge sp addr (map valu vl) = Some a ->
       Mem.loadv chunk m a = Some v ->
-      rhs_eval_to valu ge sp m (Load chunk addr vl) v.
+      rhs_eval_to valu ge sp m (Load chunk addr vl p) v.
+
+Lemma rhs_eval_to_compat: forall valu ge sp m rh v rh',
+  rhs_eval_to valu ge sp m rh v ->
+  rhs_compat rh rh' ->
+  rhs_eval_to valu ge sp m rh' v.
+Proof.
+  intros. inv H; inv H0; econstructor; eauto.
+Qed.
+
+(** A [Load] equation carries a region (abstract pointer) [p],
+    characterizing which part of memory is being read.
+    The following predicate makes sure the actual address
+    belongs to the given region. *)
+
+Inductive rhs_valid (valu: valuation) (ge: genv): val -> rhs -> Prop :=
+  | op_valid: forall sp op vl,
+      rhs_valid valu ge sp (Op op vl)
+  | load_valid: forall sp chunk addr vl p b ofs bc,
+      eval_addressing ge (Vptr sp Ptrofs.zero) addr (map valu vl) = Some (Vptr b ofs) ->
+      pmatch bc b ofs p -> genv_match bc ge -> bc sp = BCstack ->
+      rhs_valid valu ge (Vptr sp Ptrofs.zero) (Load chunk addr vl p).
 
 Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem):
                                                       equation -> Prop :=
   | eq_holds_strict: forall l r,
       rhs_eval_to valu ge sp m r (valu l) ->
+      rhs_valid valu ge sp r ->
       equation_holds valu ge sp m (Eq l true r)
   | eq_holds_lessdef: forall l r v,
       rhs_eval_to valu ge sp m r v -> Val.lessdef v (valu l) ->
+      rhs_valid valu ge sp r ->
       equation_holds valu ge sp m (Eq l false r).
 
 Record numbering_holds (valu: valuation) (ge: genv) (sp: val)
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index aece162c0..f4c76fffb 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -74,6 +74,17 @@ Proof.
 - econstructor; eauto. rewrite valnums_val_exten by assumption. auto.
 Qed.
 
+Lemma rhs_valid_exten:
+  forall r,
+  rhs_valid valu1 ge sp r ->
+  (forall v, In v (valnums_rhs r) -> Plt v upto) ->
+  rhs_valid valu2 ge sp r.
+Proof.
+  intros. inv H; simpl in *.
+- constructor.
+- econstructor; eauto. rewrite valnums_val_exten by assumption. auto.
+Qed.
+
 Lemma equation_holds_exten:
   forall e,
   equation_holds valu1 ge sp m e ->
@@ -82,7 +93,9 @@ Lemma equation_holds_exten:
 Proof.
   intros. destruct e. destruct H0. inv H.
 - constructor. rewrite AGREE by auto. apply rhs_eval_to_exten; auto.
+  apply rhs_valid_exten; auto.
 - econstructor. apply rhs_eval_to_exten; eauto. rewrite AGREE by auto. auto.
+  apply rhs_valid_exten; auto.
 Qed.
 
 Lemma numbering_holds_exten:
@@ -176,24 +189,26 @@ Qed.
 
 Lemma find_valnum_rhs_charact:
   forall rh v eqs,
-  find_valnum_rhs rh eqs = Some v -> In (Eq v true rh) eqs.
+    find_valnum_rhs rh eqs = Some v ->
+    exists rh', In (Eq v true rh') eqs /\ rhs_compat rh rh'.
 Proof.
   induction eqs; simpl; intros.
 - inv H.
-- destruct a. destruct (strict && eq_rhs rh r) eqn:T.
-  + InvBooleans. inv H. left; auto.
-  + right; eauto.
+- destruct a. destruct (strict && compat_rhs rh r) eqn:T.
+  + InvBooleans. inv H. exists r; auto using compat_rhs_sound.
+  + destruct IHeqs as (rh' & A & B); auto. exists rh'; auto.
 Qed.
 
 Lemma find_valnum_rhs'_charact:
   forall rh v eqs,
-  find_valnum_rhs' rh eqs = Some v -> exists strict, In (Eq v strict rh) eqs.
+    find_valnum_rhs' rh eqs = Some v ->
+    exists strict rh', In (Eq v strict rh') eqs /\ rhs_compat rh rh'.
 Proof.
   induction eqs; simpl; intros.
 - inv H.
-- destruct a. destruct (eq_rhs rh r) eqn:T.
-  + inv H. exists strict; auto.
-  + exploit IHeqs; eauto. intros [s IN]. exists s; auto.
+- destruct a. destruct (compat_rhs rh r) eqn:T.
+  + inv H. exists strict, r; auto using compat_rhs_sound.
+  + exploit IHeqs; eauto. intros (s & rh' & A & B). exists s, rh'; auto.
 Qed.
 
 Lemma find_valnum_num_charact:
@@ -238,22 +253,11 @@ Lemma find_rhs_sound:
   exists v, rhs_eval_to valu ge sp m rh v /\ Val.lessdef v rs#r.
 Proof.
   unfold find_rhs; intros. destruct (find_valnum_rhs' rh (num_eqs n)) as [vres|] eqn:E; try discriminate.
-  exploit find_valnum_rhs'_charact; eauto. intros [strict IN].
+  exploit find_valnum_rhs'_charact; eauto. intros (strict & rh' & IN & COMPAT).
   erewrite reg_valnum_sound by eauto.
   exploit num_holds_eq; eauto. intros EH. inv EH.
-- exists (valu vres); auto.
-- exists v; auto.
-Qed.
-
-Remark in_remove:
-  forall (A: Type) (eq: forall (x y: A), {x=y}+{x<>y}) x y l,
-  In y (List.remove eq x l) <-> x <> y /\ In y l.
-Proof.
-  induction l; simpl.
-  tauto.
-  destruct (eq x a).
-  subst a. rewrite IHl. tauto.
-  simpl. rewrite IHl. intuition congruence.
+- exists (valu vres); eauto using rhs_eval_to_compat, rhs_compat_sym.
+- exists v; eauto using rhs_eval_to_compat, rhs_compat_sym.
 Qed.
 
 Lemma forget_reg_charact:
@@ -264,7 +268,7 @@ Proof.
   unfold forget_reg; intros.
   destruct (PTree.get rd n.(num_reg)) as [vd|] eqn:GET.
 - rewrite PMap.gsspec in H0. destruct (peq v vd).
-  + subst v. rewrite in_remove in H0. intuition.
+  + subst v. apply List.in_remove in H0. tauto.
   + split; auto. exploit wf_num_val; eauto. congruence.
 - split; auto. exploit wf_num_val; eauto. congruence.
 Qed.
@@ -296,6 +300,7 @@ Qed.
 Lemma add_rhs_holds:
   forall valu1 ge sp rs m n rd rh rs',
   numbering_holds valu1 ge sp rs m n ->
+  rhs_valid valu1 ge sp rh ->
   rhs_eval_to valu1 ge sp m rh (rs'#rd) ->
   wf_rhs n.(num_next) rh ->
   (forall r, r <> rd -> rs'#r = rs#r) ->
@@ -305,10 +310,11 @@ Proof.
   destruct (find_valnum_rhs rh n.(num_eqs)) as [vres|] eqn:FIND.
 
 - (* A value number exists already *)
-  exploit find_valnum_rhs_charact; eauto. intros IN.
+  exploit find_valnum_rhs_charact; eauto. intros (rh' & IN & COMPAT).
   exploit wf_num_eqs; eauto with cse. intros [A B].
   exploit num_holds_eq; eauto. intros EH. inv EH.
-  assert (rs'#rd = valu1 vres) by (eapply rhs_eval_to_inj; eauto).
+  assert (rs'#rd = valu1 vres) by
+    (eapply rhs_eval_to_inj; eauto using rhs_eval_to_compat, rhs_compat_sym).
   exists valu1; constructor; simpl; intros.
 + constructor; simpl; intros.
   * eauto with cse.
@@ -319,7 +325,7 @@ Proof.
 + eauto with cse.
 + rewrite PTree.gsspec in H5. destruct (peq r rd).
   congruence.
-  rewrite H2 by auto. eauto with cse.
+  rewrite H3 by auto. eauto with cse.
 
 - (* Assigning a new value number *)
   set (valu2 := fun v => if peq v n.(num_next) then rs'#rd else valu1 v).
@@ -327,20 +333,21 @@ Proof.
   { red; intros. unfold valu2. apply peq_false. apply Plt_ne; auto. }
   exists valu2; constructor; simpl; intros.
 + constructor; simpl; intros.
-  * destruct H3. inv H3. simpl; split. extlia.
+  * destruct H4. inv H4. simpl; split. extlia.
     red; intros. apply Plt_trans_succ; eauto.
     apply wf_equation_incr with (num_next n). eauto with cse. extlia.
-  * rewrite PTree.gsspec in H3. destruct (peq r rd).
-    inv H3. extlia.
+  * rewrite PTree.gsspec in H4. destruct (peq r rd).
+    inv H4. extlia.
     apply Plt_trans_succ; eauto with cse.
   * apply update_reg_charact; eauto with cse.
-+ destruct H3. inv H3.
++ destruct H4. inv H4.
   constructor. unfold valu2 at 2; rewrite peq_true.
   eapply rhs_eval_to_exten; eauto.
+  eapply rhs_valid_exten; eauto.
   eapply equation_holds_exten; eauto with cse.
-+ rewrite PTree.gsspec in H3. unfold valu2. destruct (peq r rd).
-  inv H3. rewrite peq_true; auto.
-  rewrite peq_false. rewrite H2 by auto. eauto with cse.
++ rewrite PTree.gsspec in H4. unfold valu2. destruct (peq r rd).
+  inv H4. rewrite peq_true; auto.
+  rewrite peq_false. rewrite H3 by auto. eauto with cse.
   apply Plt_ne; eauto with cse.
 Qed.
 
@@ -373,22 +380,25 @@ Proof.
   exploit valnum_regs_holds; eauto.
   intros (valu2 & A & B & C & D & E).
   eapply add_rhs_holds; eauto.
++ constructor.
 + constructor. rewrite Regmap.gss. congruence.
 + intros. apply Regmap.gso; auto.
 Qed.
 
 Lemma add_load_holds:
-  forall valu1 ge sp rs m n addr (args: list reg) a chunk v dst,
-  numbering_holds valu1 ge sp rs m n ->
-  eval_addressing ge sp addr rs##args = Some a ->
-  Mem.loadv chunk m a = Some v ->
-  exists valu2, numbering_holds valu2 ge sp (rs#dst <- v) m (add_load n dst chunk addr args).
+  forall valu1 ge sp rs m n addr (args: list reg) ap b ofs chunk v dst bc,
+  numbering_holds valu1 ge (Vptr sp Ptrofs.zero) rs m n ->
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr rs##args = Some (Vptr b ofs) ->
+  Mem.loadv chunk m (Vptr b ofs) = Some v ->
+  pmatch bc b ofs ap -> genv_match bc ge -> bc sp = BCstack ->
+  exists valu2, numbering_holds valu2 ge (Vptr sp Ptrofs.zero) (rs#dst <- v) m (add_load n dst chunk addr args ap).
 Proof.
   unfold add_load; intros.
   destruct (valnum_regs n args) as [n1 vl] eqn:VN.
   exploit valnum_regs_holds; eauto.
   intros (valu2 & A & B & C & D & E).
   eapply add_rhs_holds; eauto.
++ econstructor; eauto. rewrite <- B; auto. 
 + econstructor. rewrite <- B; eauto. rewrite Regmap.gss; auto.
 + intros. apply Regmap.gso; auto.
 Qed.
@@ -437,6 +447,7 @@ Lemma kill_equations_hold:
   numbering_holds valu ge sp rs m n ->
   (forall r v,
       pred r = false ->
+      rhs_valid valu ge sp r ->
       rhs_eval_to valu ge sp m r v ->
       rhs_eval_to valu ge sp m' r v) ->
   numbering_holds valu ge sp rs m' (kill_equations pred n).
@@ -455,8 +466,8 @@ Lemma kill_all_loads_hold:
   numbering_holds valu ge sp rs m' (kill_all_loads n).
 Proof.
   intros. eapply kill_equations_hold; eauto.
-  unfold filter_loads; intros. inv H1.
-  constructor. rewrite <- H2. apply op_depends_on_memory_correct; auto.
+  unfold filter_loads; intros. inv H2.
+  constructor. rewrite <- H3. apply op_depends_on_memory_correct; auto.
   discriminate.
 Qed.
 
@@ -473,19 +484,18 @@ Lemma kill_loads_after_store_holds:
                            (kill_loads_after_store approx n chunk addr args).
 Proof.
   intros. apply kill_equations_hold with m; auto.
-  intros. unfold filter_after_store in H6; inv H7.
-- constructor. rewrite <- H8. apply op_depends_on_memory_correct; auto.
-- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
-  econstructor; eauto. rewrite <- H9.
+  intros. unfold filter_after_store in H6; inv H8.
+- constructor. rewrite <- H9. apply op_depends_on_memory_correct; auto.
+- econstructor; eauto. rewrite <- H10.
   destruct a; simpl in H1; try discriminate.
-  destruct a0; simpl in H9; try discriminate.
+  destruct a0; simpl in H10; try discriminate.
   simpl.
   rewrite negb_false_iff in H6. unfold aaddressing in H6.
+  inv H7. rewrite H9 in H14; inv H14.
   eapply Mem.load_store_other. eauto.
-  eapply pdisjoint_sound. eauto.
+  eapply pdisjoint_sound_strong with (bc1 := bc0) (bc2 := bc); eauto.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
-  erewrite <- regs_valnums_sound by eauto. eauto with va.
-  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+  eauto with va.
 Qed.
 
 Lemma store_normalized_range_sound:
@@ -507,15 +517,17 @@ Qed.
 
 Lemma add_store_result_hold:
   forall valu1 ge sp rs m' n addr args a chunk m src bc ae approx am,
-  numbering_holds valu1 ge sp rs m' n ->
-  eval_addressing ge sp addr rs##args = Some a ->
+  numbering_holds valu1 ge (Vptr sp Ptrofs.zero) rs m' n ->
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr rs##args = Some a ->
   Mem.storev chunk m a rs#src = Some m' ->
+  genv_match bc ge ->
+  bc sp = BCstack ->
   ematch bc rs ae ->
   approx = VA.State ae am ->
-  exists valu2, numbering_holds valu2 ge sp rs m' (add_store_result approx n chunk addr args src).
+  exists valu2, numbering_holds valu2 ge (Vptr sp Ptrofs.zero) rs m' (add_store_result approx n chunk addr args src).
 Proof.
   unfold add_store_result; intros.
-  unfold avalue; rewrite H3.
+  unfold avalue; rewrite H5.
   destruct (vincl (AE.get src ae) (store_normalized_range chunk)) eqn:INCL.
 - destruct (valnum_reg n src) as [n1 vsrc] eqn:VR1.
   destruct (valnum_regs n1 args) as [n2 vargs] eqn:VR2.
@@ -523,46 +535,40 @@ Proof.
   exploit valnum_regs_holds; eauto. intros (valu3 & P & Q & R & S & T).
   exists valu3. constructor; simpl; intros.
 + constructor; simpl; intros; eauto with cse.
-  destruct H4; eauto with cse. subst e. split.
+  destruct H6; eauto with cse. subst e. split.
   eapply Pos.lt_le_trans; eauto.
   red; simpl; intros. auto.
-+ destruct H4; eauto with cse. subst eq. apply eq_holds_lessdef with (Val.load_result chunk rs#src).
-  apply load_eval_to with a. rewrite <- Q; auto.
-  destruct a; try discriminate. simpl. eapply Mem.load_store_same; eauto.
-  rewrite B. rewrite R by auto. apply store_normalized_range_sound with bc.
-  rewrite <- B. eapply vmatch_ge. apply vincl_ge; eauto. apply H2.
++ destruct H6; eauto with cse. subst eq. apply eq_holds_lessdef with (Val.load_result chunk rs#src).
+  * apply load_eval_to with a. rewrite <- Q; auto.
+    destruct a; try discriminate. simpl. eapply Mem.load_store_same; eauto.
+  * rewrite B. rewrite R by auto. apply store_normalized_range_sound with bc.
+    rewrite <- B. eapply vmatch_ge. apply vincl_ge; eauto. apply H4.
+  * destruct a; try discriminate. econstructor; eauto. rewrite <- Q. eassumption.
+    apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
 + eauto with cse.
-
 - exists valu1; auto.
 Qed.
 
 Lemma kill_loads_after_storebytes_holds:
-  forall valu ge sp rs m n dst b ofs bytes m' bc approx ae am sz,
+  forall valu ge sp rs m n dst b ofs bytes m' bc sz,
   numbering_holds valu ge (Vptr sp Ptrofs.zero) rs m n ->
   pmatch bc b ofs dst ->
   Mem.storebytes m b (Ptrofs.unsigned ofs) bytes = Some m' ->
   genv_match bc ge ->
   bc sp = BCstack ->
-  ematch bc rs ae ->
-  approx = VA.State ae am ->
   length bytes = Z.to_nat sz -> sz >= 0 ->
   numbering_holds valu ge (Vptr sp Ptrofs.zero) rs m'
-                           (kill_loads_after_storebytes approx n dst sz).
+                           (kill_loads_after_storebytes n dst sz).
 Proof.
   intros. apply kill_equations_hold with m; auto.
-  intros. unfold filter_after_store in H8; inv H9.
-- constructor. rewrite <- H10. apply op_depends_on_memory_correct; auto.
-- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
-  econstructor; eauto. rewrite <- H11.
-  destruct a; simpl in H10; try discriminate.
-  simpl.
-  rewrite negb_false_iff in H8.
+  intros. unfold filter_after_store in H6; inv H8.
+- constructor. rewrite <- H9. apply op_depends_on_memory_correct; auto.
+- inv H7. rewrite H9 in H15; inv H15.
+  econstructor; eauto. rewrite <- H10. simpl.
+  rewrite negb_false_iff in H6.
   eapply Mem.load_storebytes_other. eauto.
-  rewrite H6. rewrite Z2Nat.id by lia.
-  eapply pdisjoint_sound. eauto.
-  unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
-  erewrite <- regs_valnums_sound by eauto. eauto with va.
-  auto.
+  rewrite H4. rewrite Z2Nat.id by lia.
+  eapply pdisjoint_sound_strong with (bc1 := bc0) (bc2 := bc); eauto.
 Qed.
 
 Lemma load_memcpy:
@@ -660,17 +666,20 @@ Proof with (try discriminate).
     unfold j, delta. eapply load_memcpy; eauto.
     apply Zmod_divide; auto. generalize (align_chunk_pos chunk); lia.
   }
+  rename a into ap.
+  assert (eval_addressing ge (Vptr sp Ptrofs.zero) (Ainstack (Ptrofs.repr j)) nil = Some (Vptr sp (Ptrofs.repr j))).
+  { rewrite eval_addressing_Ainstack. simpl. rewrite Ptrofs.add_zero_l. auto. }
   inv H2.
-+ inv H3. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
++ inv H5. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
   simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.
-  apply eq_holds_strict. econstructor. rewrite eval_addressing_Ainstack.
-  simpl. rewrite Ptrofs.add_zero_l. eauto.
-  apply LD; auto.
-+ inv H4. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
+  apply eq_holds_strict.
+  * econstructor; eauto.
+  * inv H7. econstructor; eauto with va.
++ inv H6. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
   simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.
   apply eq_holds_lessdef with v; auto.
-  econstructor. rewrite eval_addressing_Ainstack. simpl. rewrite Ptrofs.add_zero_l. eauto.
-  apply LD; auto.
+  * econstructor; eauto.
+  * inv H8. econstructor; eauto with va.
 Qed.
 
 Lemma add_memcpy_eqs_charact:
@@ -810,13 +819,11 @@ Proof.
   destruct (f (fun v : valnum => find_valnum_num v (num_eqs n)) op args)
            as [[op1 args1] | ] eqn:?; try discriminate.
   assert (exists r': val, sem op1 (map valu args1) = Some r' /\ Val.lessdef r r').
-  { exploit f_sound.
+  { exploit f_sound; eauto.
     simpl; intros.
     exploit num_holds_eq; eauto.
     eapply find_valnum_num_charact; eauto with cse.
-    eapply H1.
-    intros EH; inv EH; auto. eexact H3. eexact Heqo. eexact H0.
-    intros. eauto.
+    intros EH; inv EH; auto.
   }
   destruct (reduce_rec A f n niter op1 args1) as [[op2 rl2] | ] eqn:?.
   destruct H1. destruct H1.
@@ -1102,7 +1109,7 @@ Proof.
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
   exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
-  destruct (find_rhs n1 (Load chunk addr vl)) as [r|] eqn:?.
+  destruct (find_rhs n1 (Load chunk addr vl Ptop)) as [r|] eqn:?.
 + (* replaced by move *)
   exploit find_rhs_sound; eauto. intros (v' & EV & LD).
   assert (v' = v) by (inv EV; congruence). subst v'.
@@ -1111,7 +1118,10 @@ Proof.
   econstructor; eauto.
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H.
+  destruct a; try discriminate. InvSoundState.
   eapply add_load_holds; eauto.
+  unfold aaddressing, vanalyze. rewrite AN. apply match_aptr_of_aval.
+  eapply eval_static_addressing_sound; eauto with va.
   apply set_reg_lessdef; auto. eapply Val.lessdef_trans; eauto.
 + (* load is preserved, but addressing is possibly simplified *)
   destruct (reduce addressing combine_addr n1 addr args vl) as [addr' args'] eqn:?.
@@ -1129,7 +1139,10 @@ Proof.
   econstructor; eauto.
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H.
+  destruct a; try discriminate. InvSoundState.
   eapply add_load_holds; eauto.
+  unfold aaddressing, vanalyze. rewrite AN. apply match_aptr_of_aval.
+  eapply eval_static_addressing_sound; eauto with va.
   apply set_reg_lessdef; auto.
 
 - (* Istore *)