From 8345b783993a61ba0e2865e11c51538ebf4d6237 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Sun, 4 Feb 2018 22:28:25 -0500
Subject: [PATCH 1/2] Begin work on new optimizer framework

This introduces a new framework for middle-end optimizations in julia, based on
SSA form IR. Much work is still left to be done, but thanks to Jameson's work
this is in a bootstrappable form, so we're merging it behind a feature flag
to allow for quick iteration.

Documentation of the new IR format is available in the devdocs.

Co-authored-by: Jameson Nash <vtjnash@gmail.com>
---
 base/array.jl                   |  10 +
 base/boot.jl                    |  14 +-
 base/checked.jl                 |   2 +-
 base/compiler/bootstrap.jl      |   2 -
 base/compiler/compiler.jl       |  26 ++
 base/compiler/optimize.jl       | 136 ++++++--
 base/compiler/ssair/domtree.jl  |  97 ++++++
 base/compiler/ssair/driver.jl   |  75 +++++
 base/compiler/ssair/ir.jl       | 574 ++++++++++++++++++++++++++++++++
 base/compiler/ssair/legacy.jl   | 143 ++++++++
 base/compiler/ssair/passes.jl   |  84 +++++
 base/compiler/ssair/queries.jl  |   9 +
 base/compiler/ssair/show.jl     | 138 ++++++++
 base/compiler/ssair/slot2ssa.jl | 444 ++++++++++++++++++++++++
 base/compiler/ssair/verify.jl   |  52 +++
 base/compiler/typelattice.jl    |  16 +
 base/compiler/utilities.jl      |  36 +-
 base/compiler/validation.jl     |   5 +-
 base/iterators.jl               |  33 +-
 base/ordering.jl                |   9 +-
 base/sort.jl                    |  20 +-
 base/stream.jl                  |   8 +-
 base/subarray.jl                | 213 ------------
 base/sysimg.jl                  |   1 +
 base/views.jl                   | 212 ++++++++++++
 doc/src/devdocs/ssair.md        | 100 ++++++
 src/ast.c                       |   2 +
 src/builtins.c                  |   2 +
 src/cgutils.cpp                 |  48 ++-
 src/codegen.cpp                 | 406 ++++++++++++++++++----
 src/dump.c                      |  13 +-
 src/gf.c                        |   5 +
 src/interpreter-stacktrace.c    |   2 +-
 src/interpreter.c               |  40 ++-
 src/intrinsics.cpp              |   3 +
 src/jltypes.c                   |  14 +
 src/julia.h                     |   4 +
 src/julia_internal.h            |   1 +
 src/rtutils.c                   |  12 +-
 src/staticdata.c                |   2 +
 test/core.jl                    |  14 +-
 test/inline.jl                  |   2 +-
 42 files changed, 2662 insertions(+), 367 deletions(-)
 create mode 100644 base/compiler/ssair/domtree.jl
 create mode 100644 base/compiler/ssair/driver.jl
 create mode 100644 base/compiler/ssair/ir.jl
 create mode 100644 base/compiler/ssair/legacy.jl
 create mode 100644 base/compiler/ssair/passes.jl
 create mode 100644 base/compiler/ssair/queries.jl
 create mode 100644 base/compiler/ssair/show.jl
 create mode 100644 base/compiler/ssair/slot2ssa.jl
 create mode 100644 base/compiler/ssair/verify.jl
 create mode 100644 base/views.jl
 create mode 100644 doc/src/devdocs/ssair.md

diff --git a/base/array.jl b/base/array.jl
index e5c191667d2a4..668beb348ddfd 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -214,6 +214,16 @@ end
 
 copyto!(dest::Array{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, length(src))
 
+# N.B: The generic definition in multidimensional.jl covers, this, this is just here
+# for bootstrapping purposes.
+function fill!(dest::Array{T}, x) where T
+    xT = convert(T, x)
+    for i in 1:length(dest)
+        @inbounds dest[i] = xT
+    end
+    dest
+end
+
 """
     copy(x)
 
diff --git a/base/boot.jl b/base/boot.jl
index 75070ddf496d7..996f21addf9ae 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -98,6 +98,16 @@
 #    label::Int
 #end
 
+#struct PiNode
+#    val
+#    typ
+#end
+
+#struct PhiNode
+#    edges::Vector{Any}
+#    values::Vector{Any}
+#end
+
 #struct QuoteNode
 #    value
 #end
@@ -141,7 +151,7 @@ export
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
     UndefKeywordError,
     # AST representation
-    Expr, QuoteNode, LineNumberNode, GlobalRef,
+    Expr, QuoteNode, LineNumberNode, GlobalRef, PiNode, PhiNode,
     # object model functions
     fieldtype, getfield, setfield!, nfields, throw, tuple, ===, isdefined, eval,
     # sizeof    # not exported, to avoid conflicting with Base.sizeof
@@ -344,6 +354,8 @@ eval(Core, :(LineNumberNode(l::Int, @nospecialize(f)) = $(Expr(:new, :LineNumber
 eval(Core, :(GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))))
 eval(Core, :(SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))))
 eval(Core, :(TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))))
+eval(Core, :(PhiNode(edges::Array{Any, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))))
+eval(Core, :(PiNode(val, typ) = $(Expr(:new, :PiNode, :val, :typ))))
 
 Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)
 
diff --git a/base/checked.jl b/base/checked.jl
index 61284a8620e54..794ce295e0c60 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -13,7 +13,7 @@ import Core.Intrinsics:
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
        checked_urem_int
-import Base: no_op_err, @_inline_meta, @_noinline_meta
+import ..no_op_err, ..@_inline_meta, ..@_noinline_meta
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index 1a19429c8155a..66e2ea8c2ade8 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -31,5 +31,3 @@ let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call],
         end
     end
 end
-
-ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext)
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index d25dae6884d01..6ed61657292b0 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -50,6 +50,14 @@ include("refvalue.jl")
 # checked arithmetic
 const checked_add = +
 const checked_sub = -
+const SignedInt = Union{Int8,Int16,Int32,Int64,Int128}
+const UnsignedInt = Union{UInt8,UInt16,UInt32,UInt64,UInt128}
+sub_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_ssub_int(x, y)
+sub_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_usub_int(x, y)
+sub_with_overflow(x::Bool, y::Bool) = (x-y, false)
+add_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_sadd_int(x, y)
+add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y)
+add_with_overflow(x::Bool, y::Bool) = (x+y, false)
 
 # core array operations
 include("indices.jl")
@@ -66,12 +74,29 @@ include("reduce.jl")
 include("bitarray.jl")
 include("bitset.jl")
 include("abstractdict.jl")
+include("abstractset.jl")
 include("iterators.jl")
+using .Iterators: zip, enumerate
+using .Iterators: Flatten, product  # for generators
 include("namedtuple.jl")
 
 # core docsystem
 include("docs/core.jl")
 
+# SubArray
+include("subarray.jl")
+macro views(x); esc(x); end
+
+# sorting
+function sort end
+function sort! end
+function issorted end
+function sortperm end
+include("ordering.jl")
+using .Order
+include("sort.jl")
+using .Sort
+
 ############
 # compiler #
 ############
@@ -96,6 +121,7 @@ include("compiler/typeinfer.jl")
 include("compiler/optimize.jl") # TODO: break this up further + extract utilities
 
 include("compiler/bootstrap.jl")
+ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext)
 
 end # baremodule Compiler
 ))
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 75c0ff16b327b..77187344b106e 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+include("compiler/ssair/driver.jl")
+
 #####################
 # OptimizationState #
 #####################
@@ -21,7 +23,7 @@ mutable struct OptimizationState
             s_edges = []
             frame.stmt_edges[1] = s_edges
         end
-        next_label = label_counter(frame.src.code) + 1
+        next_label = max(label_counter(frame.src.code), length(frame.src.code)) + 10
         return new(frame.linfo, frame.vararg_type_container,
                    s_edges::Vector{Any},
                    frame.src, frame.mod, frame.nargs,
@@ -51,7 +53,7 @@ mutable struct OptimizationState
             inmodule = linfo.def::Module
             nargs = 0
         end
-        next_label = label_counter(src.code) + 1
+        next_label = max(label_counter(frame.src.code), length(frame.src.code)) + 10
         vararg_type_container = nothing # if you want something more accurate, set it yourself :P
         return new(linfo, vararg_type_container,
                    s_edges::Vector{Any},
@@ -260,6 +262,8 @@ function isinlineable(m::Method, src::CodeInfo, mod::Module, params::Params, bon
     return inlineable
 end
 
+const enable_new_optimizer = RefValue{Bool}(false)
+
 # converge the optimization work
 function optimize(me::InferenceState)
     # annotate fulltree with type information
@@ -267,6 +271,7 @@ function optimize(me::InferenceState)
 
     # run optimization passes on fulltree
     force_noinline = true
+    def = me.linfo.def
     if me.limited && me.cached && me.parent !== nothing
         # a top parent will be cached still, but not this intermediate work
         me.cached = false
@@ -280,27 +285,48 @@ function optimize(me::InferenceState)
         # optimizing and use unoptimized IR in codegen.
         gotoifnot_elim_pass!(opt)
         inlining_pass!(opt, opt.src.propagate_inbounds)
-        # Clean up after inlining
-        gotoifnot_elim_pass!(opt)
-        basic_dce_pass!(opt)
-        void_use_elim_pass!(opt)
-        copy_duplicated_expr_pass!(opt)
-        split_undef_flag_pass!(opt)
-        fold_constant_getfield_pass!(opt)
-        # Compute escape information
-        # and elide unnecessary allocations
-        alloc_elim_pass!(opt)
-        # Clean up for `alloc_elim_pass!`
-        gotoifnot_elim_pass!(opt)
-        basic_dce_pass!(opt)
-        void_use_elim_pass!(opt)
+        any_enter = any_phi = false
+        if enable_new_optimizer[]
+            any_enter = any(x->isa(x, Expr) && x.head == :enter, opt.src.code)
+            any_phi = any(x->isa(x, PhiNode) || (isa(x, Expr) && x.head == :(=) && isa(x.args[2], PhiNode)), opt.src.code)
+        end
+        if enable_new_optimizer[] && !any_enter && isa(def, Method)
+            reindex_labels!(opt)
+            nargs = Int(opt.nargs) - 1
+            if def isa Method
+                topline = LineNumberNode(Int(def.line), def.file)
+            else
+                topline = LineNumberNode(0)
+            end
+            ir = run_passes(opt.src, opt.mod, nargs, topline)
+            replace_code!(opt.src, ir, nargs, topline)
+            push!(opt.src.code, LabelNode(length(opt.src.code) + 1))
+            any_phi = true
+        elseif !any_phi
+            # Clean up after inlining
+            gotoifnot_elim_pass!(opt)
+            basic_dce_pass!(opt)
+            void_use_elim_pass!(opt)
+            if !enable_new_optimizer[]
+                copy_duplicated_expr_pass!(opt)
+                split_undef_flag_pass!(opt)
+                fold_constant_getfield_pass!(opt)
+                # Compute escape information
+                # and elide unnecessary allocations
+                alloc_elim_pass!(opt)
+                # Clean up for `alloc_elim_pass!`
+                gotoifnot_elim_pass!(opt)
+                basic_dce_pass!(opt)
+                void_use_elim_pass!(opt)
+            end
+        end
         # Pop metadata before label reindexing
         let code = opt.src.code::Array{Any,1}
             meta_elim_pass!(code, coverage_enabled())
             filter!(x -> x !== nothing, code)
             force_noinline = peekmeta(code, :noinline)[1]
+            reindex_labels!(opt)
         end
-        reindex_labels!(opt)
         me.min_valid = opt.min_valid
         me.max_valid = opt.max_valid
     end
@@ -368,7 +394,6 @@ function optimize(me::InferenceState)
             force_noinline = true
         end
     end
-    def = me.linfo.def
     if force_noinline
         me.src.inlineable = false
     elseif !me.src.inlineable && isa(def, Method)
@@ -575,6 +600,7 @@ function type_annotate!(sv::InferenceState)
     undefs = fill(false, nslots)
     body = src.code::Array{Any,1}
     nexpr = length(body)
+    push!(body, LabelNode(nexpr + 1)) # add a terminal label for tracking phi
     i = 1
     while i <= nexpr
         st_i = states[i]
@@ -649,6 +675,8 @@ function _widen_all_consts!(e::Expr, untypedload::Vector{Bool}, slottypes::Vecto
                 end
                 e.args[i] = x
             end
+        elseif isa(x, PiNode)
+            e.args[i] = PiNode(x.val, widenconst(x.typ))
         elseif isa(x, SlotNumber) && (i != 1 || e.head !== :(=))
             untypedload[x.id] = true
         end
@@ -722,6 +750,18 @@ function substitute!(
     if isa(e, NewvarNode)
         return NewvarNode(substitute!(e.slot, na, argexprs, spsig, spvals, offset, boundscheck))
     end
+    if isa(e, PhiNode)
+        values = Vector{Any}(uninitialized, length(e.values))
+        for i = 1:length(values)
+            isassigned(e.values, i) || continue
+            values[i] = substitute!(e.values[i], na, argexprs, spsig,
+                spvals, offset, boundscheck)
+        end
+        return PhiNode(e.edges, values)
+    end
+    if isa(e, PiNode)
+        return PiNode(substitute!(e.val, na, argexprs, spsig, spvals, offset, boundscheck), e.typ)
+    end
     if isa(e, Expr)
         e = e::Expr
         head = e.head
@@ -797,7 +837,7 @@ function is_pure_builtin(@nospecialize(f))
     end
 end
 
-function statement_effect_free(@nospecialize(e), src::CodeInfo, mod::Module)
+function statement_effect_free(@nospecialize(e), src, mod::Module)
     if isa(e, Expr)
         if e.head === :(=)
             return !isa(e.args[1], GlobalRef) && effect_free(e.args[2], src, mod, false)
@@ -813,7 +853,7 @@ end
 # detect some important side-effect-free calls (allow_volatile=true)
 # and some affect-free calls (allow_volatile=false) -- affect_free means the call
 # cannot be affected by previous calls, except assignment nodes
-function effect_free(@nospecialize(e), src::CodeInfo, mod::Module, allow_volatile::Bool)
+function effect_free(@nospecialize(e), src, mod::Module, allow_volatile::Bool)
     if isa(e, GlobalRef)
         return (isdefined(e.mod, e.name) && (allow_volatile || isconst(e.mod, e.name)))
     elseif isa(e, Symbol)
@@ -1422,16 +1462,19 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
 
     # make labels / goto statements unique
     # relocate inlining information
-    newlabels = zeros(Int, label_counter(body.args))
-    for i = 1:length(body.args)
+    body_len = length(body.args)
+    newlabels = zeros(Int, body_len + 1)
+    for i = 1:body_len
         a = body.args[i]
         if isa(a, LabelNode)
+            @assert a.label == i
             newlabel = genlabel(sv)
             newlabels[a.label] = newlabel.label
             body.args[i] = newlabel
         end
     end
-    for i = 1:length(body.args)
+    local end_label # if it ends in a goto, we might need to add a come-from label
+    for i = 1:body_len
         a = body.args[i]
         if isa(a, GotoNode)
             body.args[i] = GotoNode(newlabels[a.label])
@@ -1440,6 +1483,19 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
                 a.args[1] = newlabels[a.args[1]::Int]
             elseif a.head === :gotoifnot
                 a.args[2] = newlabels[a.args[2]::Int]
+            elseif a.head === :(=) && isa(a.args[2], PhiNode)
+                edges = a.args[2].edges
+                if !@isdefined end_label
+                    for edge in edges
+                        if edge == body_len
+                            end_label = genlabel(sv)
+                            newlabels[body_len + 1] = end_label.label
+                            break
+                        end
+                    end
+                end
+                edges = Any[newlabels[edge::Int + 1] - 1 for edge in edges]
+                a.args[2] = PhiNode(edges, a.args[2].values)
             end
         end
     end
@@ -1449,7 +1505,14 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
     local retval
     multiret = false
     lastexpr = pop!(body.args)
-    if isa(lastexpr, LabelNode)
+    if @isdefined end_label
+        # clearly lastexpr must have been a come-from node (specifically, goto),
+        # so just need to push an empty basic-block here for the label numbering
+        # (later, we'll also push retstmt as the next statement)
+        push!(body.args, lastexpr)
+        push!(body.args, end_label)
+        lastexpr = nothing
+    elseif isa(lastexpr, LabelNode)
         push!(body.args, lastexpr)
         error_call = Expr(:call, GlobalRef(topmod, :error), "fatal error in type inference (lowering)")
         error_call.typ = Union{}
@@ -1681,6 +1744,15 @@ function ssavalue_increment(body::Expr, incr)
     end
     return body
 end
+ssavalue_increment(body::PiNode, incr) = PiNode(ssavalue_increment(body.val, incr), body.typ)
+function ssavalue_increment(body::PhiNode, incr)
+    values = Vector{Any}(uninitialized, length(body.values))
+    for i = 1:length(values)
+        isassigned(body.values, i) || continue
+        values[i] = ssavalue_increment(body.values[i], incr)
+    end
+    return PhiNode(body.edges, values)
+end
 
 function mk_getfield(texpr, i, T)
     e = Expr(:call, TOP_GETFIELD, texpr, i)
@@ -1967,7 +2039,7 @@ function add_slot!(src::CodeInfo, @nospecialize(typ), is_sa::Bool, name::Symbol=
     return SlotNumber(id)
 end
 
-function is_known_call(e::Expr, @nospecialize(func), src::CodeInfo, mod::Module)
+function is_known_call(e::Expr, @nospecialize(func), src, mod::Module)
     if e.head !== :call
         return false
     end
@@ -1975,7 +2047,7 @@ function is_known_call(e::Expr, @nospecialize(func), src::CodeInfo, mod::Module)
     return isa(f, Const) && f.val === func
 end
 
-function is_known_call_p(e::Expr, @nospecialize(pred), src::CodeInfo, mod::Module)
+function is_known_call_p(e::Expr, @nospecialize(pred), src, mod::Module)
     if e.head !== :call
         return false
     end
@@ -4126,9 +4198,21 @@ function reindex_labels!(sv::OptimizationState)
                 labelnum = mapping[el.args[1]::Int]
                 @assert labelnum !== 0
                 el.args[1] = labelnum
+            elseif el.head === :(=)
+                if isa(el.args[2], PhiNode)
+                    edges = Any[mapping[edge::Int + 1] - 1 for edge in el.args[2].edges]
+                    el.args[2] = PhiNode(convert(Vector{Any}, edges), el.args[2].values)
+                end
             end
         end
     end
+    if body[end] isa LabelNode
+        # we usually have a trailing label for the purposes of phi numbering
+        # this can now be deleted also if unused
+        if label_counter(body, false) < length(body)
+            pop!(body)
+        end
+    end
 end
 
 function return_type(@nospecialize(f), @nospecialize(t))
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
new file mode 100644
index 0000000000000..745f87dface2e
--- /dev/null
+++ b/base/compiler/ssair/domtree.jl
@@ -0,0 +1,97 @@
+struct DomTreeNode
+    level::Int
+    children::Vector{Int}
+end
+DomTreeNode() = DomTreeNode(1, Vector{Int}())
+
+struct DomTree
+    idoms::Vector{Int}
+    nodes::Vector{DomTreeNode}
+end
+
+"""
+    Checks if bb1 dominates bb2
+"""
+function dominates(domtree, bb1, bb2)
+    bb1 == bb2 && return true
+    target_level = domtree.nodes[bb1].level
+    source_level = domtree.nodes[bb2].level
+    source_level < target_level && return false
+    for _ in (source_level-1):-1:target_level
+        bb2 = domtree.idoms[bb2]
+    end
+    return bb1 == bb2
+end
+
+function update_level!(domtree, node, level)
+    domtree[node] = DomTreeNode(level, domtree[node].children)
+    foreach(domtree[node].children) do child
+        update_level!(domtree, child, level+1)
+    end
+end
+
+struct DominatedBlocks
+    domtree::DomTree
+    worklist::Vector{Int}
+end
+
+function dominated(domtree::DomTree, root::Int)
+    doms = DominatedBlocks(domtree, Vector{Int}())
+    push!(doms.worklist, root)
+    doms
+end
+
+function start(doms::DominatedBlocks)
+    nothing
+end
+
+function next(doms::DominatedBlocks, state::Nothing)
+    bb = pop!(doms.worklist)
+    for dominated in doms.domtree.nodes[bb].children
+        push!(doms.worklist, dominated)
+    end
+    (bb, nothing)
+end
+
+function done(doms::DominatedBlocks, state::Nothing)
+    isempty(doms.worklist)
+end
+
+# Construct Dom Tree
+# Simple algorithm - TODO: Switch to the fast version (e.g. https://tanujkhattar.wordpress.com/2016/01/11/dominator-tree-of-a-directed-graph/)
+function construct_domtree(cfg)
+    dominators = IdSet{Int}[n == 1 ? IdSet{Int}(n) : IdSet{Int}(1:length(cfg.blocks)) for n = 1:length(cfg.blocks)]
+    changed = true
+    while changed
+        changed = false
+        for n = 2:length(cfg.blocks)
+            isempty(cfg.blocks[n].preds) && continue
+            firstp, rest = Iterators.peel(cfg.blocks[n].preds)
+            new_doms = copy(dominators[firstp])
+            for p in rest
+                intersect!(new_doms, dominators[p])
+            end
+            push!(new_doms, n)
+            changed |= (new_doms != dominators[n])
+            dominators[n] = new_doms
+        end
+    end
+    # Compute idoms
+    idoms = fill(0, length(cfg.blocks))
+    for i = 2:length(cfg.blocks)
+        for dom in dominators[i]
+            i == dom && continue
+            any(p->p !== i && p !== dom && dom in dominators[p], dominators[i]) && continue
+            idoms[i] = dom
+        end
+    end
+    # Compute children
+    domtree = DomTreeNode[DomTreeNode() for _ = 1:length(cfg.blocks)]
+    for (idx, idom) in Iterators.enumerate(idoms)
+        (idx == 1 || idom == 0) && continue
+        push!(domtree[idom].children, idx)
+    end
+    # Recursively set level
+    update_level!(domtree, 1, 1)
+    DomTree(idoms, domtree)
+end
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
new file mode 100644
index 0000000000000..998b356fb9d84
--- /dev/null
+++ b/base/compiler/ssair/driver.jl
@@ -0,0 +1,75 @@
+include("compiler/ssair/ir.jl")
+include("compiler/ssair/domtree.jl")
+include("compiler/ssair/slot2ssa.jl")
+include("compiler/ssair/queries.jl")
+include("compiler/ssair/passes.jl")
+include("compiler/ssair/verify.jl")
+include("compiler/ssair/legacy.jl")
+
+macro show(s)
+    # return :(println($(QuoteNode(s)), " = ", $(esc(s))))
+end
+
+function normalize(@nospecialize(stmt), meta::Vector{Any}, inline::Vector{Any}, loc::RefValue{LineNumberNode})
+    if isa(stmt, Expr)
+        if stmt.head == :meta
+            args = stmt.args
+            if length(args) > 0
+                a1 = args[1]
+                if a1 === :push_loc
+                    push!(inline, stmt)
+                elseif a1 === :pop_loc
+                    n = (length(args) > 1) ? args[2]::Int : 1
+                    for i in 1:n
+                        isempty(inline) && break
+                        pop!(inline)
+                    end
+                else
+                    push!(meta, stmt)
+                end
+            end
+            return nothing
+        elseif stmt.head === :line
+            return nothing # deprecated - we shouldn't encounter this
+        elseif stmt.head === :gotoifnot
+            return GotoIfNot(stmt.args...)
+        elseif stmt.head === :return
+            return ReturnNode{Any}(stmt.args...)
+        end
+    elseif isa(stmt, LabelNode)
+        return nothing
+    elseif isa(stmt, LineNumberNode)
+        loc[] = stmt
+        return nothing
+    end
+    return stmt
+end
+
+function run_passes(ci::CodeInfo, mod::Module, nargs::Int, toploc::LineNumberNode)
+    ci.code = copy(ci.code)
+    meta = Any[]
+    lines = fill(LineNumberNode(0), length(ci.code))
+    let inline = Any[], loc = RefValue(toploc)
+        for i = 1:length(ci.code)
+            stmt = ci.code[i]
+            stmt = normalize(stmt, meta, inline, loc)
+            ci.code[i] = stmt
+            stmt === nothing || (lines[i] = loc[])
+        end
+    end
+    ci.code = strip_trailing_junk!(ci.code, lines)
+    cfg = compute_basic_blocks(ci.code)
+    defuse_insts = scan_slot_def_use(nargs, ci)
+    domtree = construct_domtree(cfg)
+    ir = let code = Any[nothing for _ = 1:length(ci.code)]
+             argtypes = ci.slottypes[1:(nargs+1)]
+            IRCode(code, lines, cfg, argtypes, mod, meta)
+        end
+    ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs)
+    ir = compact!(ir)
+    verify_ir(ir)
+    ir = type_lift_pass!(ir)
+    ir = compact!(ir)
+    verify_ir(ir)
+    return ir
+end
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
new file mode 100644
index 0000000000000..c09bf9067c28b
--- /dev/null
+++ b/base/compiler/ssair/ir.jl
@@ -0,0 +1,574 @@
+Core.PhiNode() = PhiNode(Any[], Any[])
+@inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head
+
+struct Argument
+    n::Int
+end
+
+struct GotoIfNot
+    cond
+    dest::Int
+    GotoIfNot(@nospecialize(cond), dest::Int) = new(cond, dest)
+end
+
+struct ReturnNode{T}
+    val::T
+    ReturnNode{T}(@nospecialize(val)) where {T} = new{T}(val::T)
+    ReturnNode{T}() where {T} = new{T}()
+end
+
+"""
+Like UnitRange{Int}, but can handle the `last` field, being temporarily
+< first (this can happen during compacting)
+"""
+struct StmtRange <: AbstractUnitRange{Int}
+    first::Int
+    last::Int
+end
+first(r::StmtRange) = r.first
+last(r::StmtRange) = r.last
+start(r::StmtRange) = 0
+done(r::StmtRange, state) = r.last - r.first < state
+next(r::StmtRange, state) = (r.first + state, state + 1)
+
+struct BasicBlock
+    stmts::StmtRange
+    preds::Vector{Int}
+    succs::Vector{Int}
+end
+function BasicBlock(stmts::StmtRange)
+    BasicBlock(stmts, Int[], Int[])
+end
+function BasicBlock(old_bb, stmts)
+    BasicBlock(stmts, old_bb.preds, old_bb.succs)
+end
+
+struct CFG
+    blocks::Vector{BasicBlock}
+    index::Vector{Int}
+end
+
+function block_for_inst(index, inst)
+    searchsortedfirst(index, inst, lt=(<=))
+end
+block_for_inst(cfg::CFG, inst) = block_for_inst(cfg.index, inst)
+
+function compute_basic_blocks(stmts::Vector{Any})
+    jump_dests = IdSet{Int}(1)
+    # First go through and compute jump destinations
+    for (idx, stmt) in pairs(stmts)
+        # Terminators
+        if isa(stmt, Union{GotoIfNot, GotoNode, ReturnNode})
+            if isa(stmt, GotoIfNot)
+                push!(jump_dests, idx+1)
+                push!(jump_dests, stmt.dest)
+            else
+                # This is a fake dest to force the next stmt to start a bb
+                idx < length(stmts) && push!(jump_dests, idx+1)
+                if isa(stmt, GotoNode)
+                    push!(jump_dests, stmt.label)
+                end
+            end
+        end
+    end
+    bb_starts = sort(collect(jump_dests))
+    for i = length(stmts):-1:1
+        if stmts[i] != nothing
+            push!(bb_starts, i+1)
+            break
+        end
+    end
+    # Compute ranges
+    basic_block_index = Int[]
+    blocks = BasicBlock[]
+    sizehint!(blocks, length(bb_starts)-1)
+    for (first, last) in Iterators.zip(bb_starts, Iterators.drop(bb_starts, 1))
+        push!(basic_block_index, first)
+        push!(blocks, BasicBlock(StmtRange(first, last-1)))
+    end
+    popfirst!(basic_block_index)
+    # Compute successors/predecessors
+    for (num, b) in pairs(blocks)
+        terminator = stmts[last(b.stmts)]
+        # Conditional Branch
+        if isa(terminator, GotoIfNot)
+            block′ = block_for_inst(basic_block_index, terminator.dest)
+            push!(blocks[block′].preds, num)
+            push!(b.succs, block′)
+        end
+        if isa(terminator, GotoNode)
+            block′ = block_for_inst(basic_block_index, terminator.label)
+            push!(blocks[block′].preds, num)
+            push!(b.succs, block′)
+        elseif !isa(terminator, ReturnNode)
+            if num + 1 <= length(blocks)
+                push!(blocks[num+1].preds, num)
+                push!(b.succs, num+1)
+            end
+        end
+    end
+    CFG(blocks, basic_block_index)
+end
+
+function first_insert_for_bb(code, cfg, block)
+    for idx in cfg.blocks[block].stmts
+        stmt = code[idx]
+        if !isa(stmt, LabelNode) && !isa(stmt, PhiNode)
+            return idx
+        end
+    end
+end
+
+
+const NewNode = Tuple{Int, Any, Any, LineNumberNode}
+
+struct IRCode
+    stmts::Vector{Any}
+    types::Vector{Any}
+    lines::Vector{LineNumberNode}
+    argtypes::Vector{Any}
+    cfg::CFG
+    new_nodes::Vector{NewNode}
+    mod::Module
+    meta::Vector{Any}
+
+    function IRCode(stmts::Vector{Any}, lines::Vector{LineNumberNode}, cfg::CFG, argtypes::Vector{Any}, mod::Module, meta::Vector{Any})
+        return new(stmts, Any[], lines, argtypes, cfg, NewNode[], mod, meta)
+    end
+    function IRCode(ir::IRCode, stmts::Vector{Any}, types::Vector{Any}, lines::Vector{LineNumberNode}, cfg::CFG, new_nodes::Vector{NewNode})
+        return new(stmts, types, lines, ir.argtypes, cfg, new_nodes, ir.mod, ir.meta)
+    end
+end
+
+function getindex(x::IRCode, s::SSAValue)
+    if s.id <= length(x.stmts)
+        return x.stmts[s.id]
+    else
+        return x.new_nodes[s.id - length(x.stmts)][3]
+    end
+end
+
+struct OldSSAValue
+    id::Int
+end
+
+struct NewSSAValue
+    id::Int
+end
+
+mutable struct UseRefIterator
+    stmt::Any
+end
+getindex(it::UseRefIterator) = it.stmt
+
+struct UseRef
+    urs::UseRefIterator
+    use::Int
+end
+
+struct OOBToken
+end
+
+struct UndefToken
+end
+
+function getindex(x::UseRef)
+    stmt = x.urs.stmt
+    if isa(stmt, Expr) && stmt.head === :(=)
+        rhs = stmt.args[2]
+        if isa(rhs, Expr) && is_relevant_expr(rhs)
+            x.use > length(rhs.args) && return OOBToken()
+            return rhs.args[x.use]
+        end
+        x.use == 1 || return OOBToken()
+        return rhs
+    elseif isa(stmt, Expr) && is_relevant_expr(stmt)
+        x.use > length(stmt.args) && return OOBToken()
+        return stmt.args[x.use]
+    elseif isa(stmt, GotoIfNot)
+        x.use == 1 || return OOBToken()
+        return stmt.cond
+    elseif isa(stmt, ReturnNode) || isa(stmt, PiNode)
+        isdefined(stmt, :val) || return OOBToken()
+        x.use == 1 || return OOBToken()
+        return stmt.val
+    elseif isa(stmt, PhiNode)
+        x.use > length(stmt.values) && return OOBToken()
+        isassigned(stmt.values, x.use) || return UndefToken()
+        return stmt.values[x.use]
+    else
+        return OOBToken()
+    end
+end
+
+function is_relevant_expr(e::Expr)
+    return e.head in (:call, :invoke, :new, :(=), :(&),
+                      :gc_preserve_begin, :gc_preserve_end,
+                      :foreigncall, :isdefined, :copyast,
+                      :undefcheck, :throw_undef_if_not)
+end
+
+function setindex!(x::UseRef, @nospecialize(v))
+    stmt = x.urs.stmt
+    if isa(stmt, Expr) && stmt.head === :(=)
+        rhs = stmt.args[2]
+        if isa(rhs, Expr) && is_relevant_expr(rhs)
+            x.use > length(rhs.args) && throw(BoundsError())
+            rhs.args[x.use] = v
+        else
+            x.use == 1 || throw(BoundsError())
+            stmt.args[2] = v
+        end
+    elseif isa(stmt, Expr) && is_relevant_expr(stmt)
+        x.use > length(stmt.args) && throw(BoundsError())
+        stmt.args[x.use] = v
+    elseif isa(stmt, GotoIfNot)
+        x.use == 1 || throw(BoundsError())
+        x.urs.stmt = GotoIfNot(v, stmt.dest)
+    elseif isa(stmt, ReturnNode)
+        x.use == 1 || throw(BoundsError())
+        x.urs.stmt = typeof(stmt)(v)
+    elseif isa(stmt, PiNode)
+        x.use == 1 || throw(BoundsError())
+        x.urs.stmt = typeof(stmt)(v, stmt.typ)
+    elseif isa(stmt, PhiNode)
+        x.use > length(stmt.values) && throw(BoundsError())
+        isassigned(stmt.values, x.use) || throw(BoundsError())
+        stmt.values[x.use] = v
+    else
+        throw(BoundsError())
+    end
+    return x
+end
+
+function userefs(@nospecialize(x))
+    if (isa(x, Expr) && is_relevant_expr(x)) ||
+        isa(x, Union{GotoIfNot, ReturnNode, PiNode, PhiNode})
+        UseRefIterator(x)
+    else
+        ()
+    end
+end
+
+start(it::UseRefIterator) = 1
+function next(it::UseRefIterator, use)
+    x = UseRef(it, use)
+    v = x[]
+    v === UndefToken() && return next(it, use + 1)
+    x, use + 1
+end
+function done(it::UseRefIterator, use)
+    x, _ = next(it, use)
+    v = x[]
+    v === OOBToken() && return true
+    false
+end
+
+function scan_ssa_use!(used::IdSet{Int64}, @nospecialize(stmt))
+    if isa(stmt, SSAValue)
+        push!(used, stmt.id)
+    end
+    for useref in userefs(stmt)
+        val = useref[]
+        if isa(val, SSAValue)
+            push!(used, val.id)
+        end
+    end
+end
+
+function ssamap(f, @nospecialize(stmt))
+    urs = userefs(stmt)
+    urs === () && return stmt
+    for op in urs
+        val = op[]
+        if isa(val, SSAValue)
+            op[] = f(val)
+        end
+    end
+    urs[]
+end
+
+function foreachssa(f, @nospecialize(stmt))
+    for op in userefs(stmt)
+        val = op[]
+        if isa(val, SSAValue)
+            f(val)
+        end
+    end
+end
+
+function insert_node!(ir::IRCode, pos::Int, @nospecialize(typ), @nospecialize(val))
+    line = ir.lines[pos]
+    push!(ir.new_nodes, (pos, typ, val, line))
+    return SSAValue(length(ir.stmts) + length(ir.new_nodes))
+end
+
+# For bootstrapping
+function my_sortperm(v)
+    p = Vector{Int}(uninitialized, length(v))
+    for i = 1:length(v)
+        p[i] = i
+    end
+    sort!(p, Sort.DEFAULT_UNSTABLE, Order.Perm(Sort.Forward,v))
+    p
+end
+
+mutable struct IncrementalCompact
+    ir::IRCode
+    result::Vector{Any}
+    result_types::Vector{Any}
+    result_lines::Vector{LineNumberNode}
+    ssa_rename::Vector{Any}
+    used_ssas::Vector{Int}
+    late_fixup::Vector{Int}
+    # This could be Stateful, but bootstrapping doesn't like that
+    perm::Vector{Int}
+    new_nodes_idx::Int
+    idx::Int
+    result_idx::Int
+    function IncrementalCompact(code::IRCode)
+        perm = my_sortperm(Int[code.new_nodes[i][1] for i in 1:length(code.new_nodes)])
+        new_len = length(code.stmts) + length(code.new_nodes)
+        result = Array{Any}(uninitialized, new_len)
+        result_types = Array{Any}(uninitialized, new_len)
+        result_lines = Array{LineNumberNode}(uninitialized, new_len)
+        ssa_rename = Any[SSAValue(i) for i = 1:new_len]
+        used_ssas = fill(0, new_len)
+        late_fixup = Vector{Int}()
+        return new(code, result, result_types, result_lines, ssa_rename, used_ssas, late_fixup, perm, 1, 1, 1)
+    end
+end
+
+struct TypesView
+    compact::IncrementalCompact
+end
+types(compact::IncrementalCompact) = TypesView(compact)
+
+function getindex(compact::IncrementalCompact, idx)
+    if idx < compact.result_idx
+        return compact.result[idx]
+    else
+        return compact.ir.stmts[idx]
+    end
+end
+
+function setindex!(compact::IncrementalCompact, v, idx)
+    if idx < compact.result_idx
+        # Kill count for current uses
+        for ops in userefs(compact.result[idx])
+            val = ops[]
+            isa(val, SSAValue) && (compact.used_ssas[val.id] -= 1)
+        end
+        # Add count for new use
+        isa(v, SSAValue) && (compact.used_ssas[v.id] += 1)
+        return compact.result[idx] = v
+    else
+        return compact.ir.stmts[idx] = v
+    end
+end
+
+function getindex(view::TypesView, idx)
+    if idx < view.compact.result_idx
+        return view.compact.result_types[idx]
+    else
+        return view.compact.ir.types[idx]
+    end
+end
+
+# maybe use expr_type?
+function value_typ(ir::IRCode, value)
+    isa(value, SSAValue) && return ir.types[value.id]
+    isa(value, GlobalRef) && return abstract_eval_global(value.mod, value.name)
+    isa(value, Argument) && return ir.argtypes[value.n]
+    # TODO: isa QuoteNode, etc.
+    return typeof(value)
+end
+
+function value_typ(ir::IncrementalCompact, value)
+    isa(value, SSAValue) && return types(ir)[value.id]
+    isa(value, GlobalRef) && return abstract_eval_global(value.mod, value.name)
+    isa(value, Argument) && return ir.ir.argtypes[value.n]
+    # TODO: isa QuoteNode, etc.
+    return typeof(value)
+end
+
+
+start(compact::IncrementalCompact) = (1,1,1)
+function done(compact::IncrementalCompact, (idx, _a, _b)::Tuple{Int, Int, Int})
+    return idx > length(compact.ir.stmts) && (compact.new_nodes_idx > length(compact.perm))
+end
+
+function process_node!(result::Vector{Any}, result_idx::Int, ssa_rename::Vector{Any},
+        late_fixup::Vector{Int}, used_ssas::Vector{Int}, @nospecialize(stmt),
+        idx::Int, processed_idx::Int)
+    ssa_rename[idx] = SSAValue(result_idx)
+    if stmt === nothing
+        ssa_rename[idx] = stmt
+    elseif isa(stmt, GotoNode) || isa(stmt, GlobalRef)
+        result[result_idx] = stmt
+        result_idx += 1
+    elseif isa(stmt, Expr) || isa(stmt, PiNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
+        result[result_idx] = renumber_ssa!(stmt, ssa_rename, true, used_ssas)
+        result_idx += 1
+    elseif isa(stmt, PhiNode)
+        values = Vector{Any}(uninitialized, length(stmt.values))
+        for i = 1:length(stmt.values)
+            isassigned(stmt.values, i) || continue
+            val = stmt.values[i]
+            if isa(val, SSAValue)
+                if val.id > processed_idx
+                    push!(late_fixup, result_idx)
+                    val = OldSSAValue(val.id)
+                else
+                    val = renumber_ssa!(val, ssa_rename, true, used_ssas)
+                end
+            end
+            values[i] = val
+        end
+        result[result_idx] = PhiNode(stmt.edges, values)
+        result_idx += 1
+    elseif isa(stmt, SSAValue)
+        # identity assign, replace uses of this ssa value with its result
+        stmt = ssa_rename[stmt.id]
+        ssa_rename[idx] = stmt
+    else
+        # Constant assign, replace uses of this ssa value with its result
+        ssa_rename[idx] = stmt
+    end
+    return result_idx
+end
+function process_node!(compact::IncrementalCompact, result_idx::Int, @nospecialize(stmt), idx::Int, processed_idx::Int)
+    return process_node!(compact.result, result_idx, compact.ssa_rename,
+        compact.late_fixup, compact.used_ssas, stmt, idx, processed_idx)
+end
+
+function next(compact::IncrementalCompact, (idx, active_bb, old_result_idx)::Tuple{Int, Int, Int})
+    if length(compact.result) < old_result_idx
+        resize!(compact.result, old_result_idx)
+        resize!(compact.result_types, old_result_idx)
+        resize!(compact.result_lines, old_result_idx)
+    end
+    bb = compact.ir.cfg.blocks[active_bb]
+    if compact.new_nodes_idx <= length(compact.perm) && compact.ir.new_nodes[compact.perm[compact.new_nodes_idx]][1] == idx
+        new_idx = compact.perm[compact.new_nodes_idx]
+        compact.new_nodes_idx += 1
+        _, typ, new_node, new_line = compact.ir.new_nodes[new_idx]
+        new_idx += length(compact.ir.stmts)
+        compact.result_types[old_result_idx] = typ
+        compact.result_lines[old_result_idx] = new_line
+        result_idx = process_node!(compact, old_result_idx, new_node, new_idx, idx)
+        (old_result_idx == result_idx) && return next(compact, (idx, result_idx))
+        compact.result_idx = result_idx
+        return (old_result_idx, compact.result[old_result_idx]), (compact.idx, active_bb, compact.result_idx)
+    end
+    # This will get overwritten in future iterations if
+    # result_idx is not, incremented, but that's ok and expected
+    compact.result_types[old_result_idx] = compact.ir.types[idx]
+    compact.result_lines[old_result_idx] = compact.ir.lines[idx]
+    result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx)
+    if idx == last(bb.stmts)
+        # If this was the last statement in the BB and we decided to skip it, insert a
+        # dummy `nothing` node, to prevent changing the structure of the CFG
+        if result_idx == first(bb.stmts)
+            compact.result[old_result_idx] = nothing
+            result_idx = old_result_idx + 1
+        end
+        compact.ir.cfg.blocks[active_bb] = BasicBlock(bb, StmtRange(first(bb.stmts), result_idx-1))
+        active_bb += 1
+        if active_bb <= length(compact.ir.cfg.blocks)
+            new_bb = compact.ir.cfg.blocks[active_bb]
+            compact.ir.cfg.blocks[active_bb] = BasicBlock(new_bb,
+                StmtRange(result_idx, last(new_bb.stmts)))
+        end
+    end
+    (old_result_idx == result_idx) && return next(compact, (idx + 1, active_bb, result_idx))
+    compact.idx = idx + 1
+    compact.result_idx = result_idx
+    if !isassigned(compact.result, old_result_idx)
+        @assert false
+    end
+    return (old_result_idx, compact.result[old_result_idx]), (compact.idx, active_bb, compact.result_idx)
+end
+
+function maybe_erase_unused!(extra_worklist, compact, idx)
+   if stmt_effect_free(compact.result[idx], compact.ir, compact.ir.mod)
+        for ops in userefs(compact.result[idx])
+            val = ops[]
+            if isa(val, SSAValue)
+                if compact.used_ssas[val.id] == 1
+                    if val.id < idx
+                        push!(extra_worklist, val.id)
+                    end
+                end
+                compact.used_ssas[val.id] -= 1
+            end
+        end
+        compact.result[idx] = nothing
+    end
+end
+
+function finish(compact::IncrementalCompact)
+    for idx in compact.late_fixup
+        stmt = compact.result[idx]::PhiNode
+        values = Vector{Any}(uninitialized, length(stmt.values))
+        for i = 1:length(stmt.values)
+            isassigned(stmt.values, i) || continue
+            val = stmt.values[i]
+            if isa(val, OldSSAValue)
+                val = compact.ssa_rename[val.id]
+                if isa(val, SSAValue)
+                    compact.used_ssas[val.id] += 1
+                end
+            end
+            values[i] = val
+        end
+        compact.result[idx] = PhiNode(stmt.edges, values)
+    end
+    # Record this somewhere?
+    result_idx = compact.result_idx
+    resize!(compact.result, result_idx-1)
+    resize!(compact.result_types, result_idx-1)
+    resize!(compact.result_lines, result_idx-1)
+    bb = compact.ir.cfg.blocks[end]
+    compact.ir.cfg.blocks[end] = BasicBlock(bb,
+                StmtRange(first(bb.stmts), result_idx-1))
+    # Perform simple DCE for unused values
+    extra_worklist = Int[]
+    for (idx, nused) in Iterators.enumerate(compact.used_ssas)
+        idx >= result_idx && break
+        nused == 0 || continue
+        maybe_erase_unused!(extra_worklist, compact, idx)
+    end
+    while !isempty(extra_worklist)
+        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist))
+    end
+    cfg = CFG(compact.ir.cfg.blocks, Int[first(bb.stmts) for bb in compact.ir.cfg.blocks[2:end]])
+    return IRCode(compact.ir, compact.result, compact.result_types, compact.result_lines, cfg, NewNode[])
+end
+
+function compact!(code::IRCode)
+    compact = IncrementalCompact(code)
+    # Just run through the iterator without any processing
+    state = start(compact)
+    while !done(compact, state)
+        _, state = next(compact, state)
+    end
+    return finish(compact)
+end
+
+struct BBIdxStmt
+    ir::IRCode
+end
+
+bbidxstmt(ir) = BBIdxStmt(ir)
+
+start(x::BBIdxStmt) = (1,1)
+done(x::BBIdxStmt, (idx, bb)) = idx > length(x.ir.stmts)
+function next(x::BBIdxStmt, (idx, bb))
+    active_bb = x.ir.cfg.blocks[bb]
+    next_bb = bb
+    if idx == last(active_bb.stmts)
+        next_bb += 1
+    end
+    return (bb, idx, x.ir.stmts[idx]), (idx + 1, next_bb)
+end
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
new file mode 100644
index 0000000000000..8e980bee1fec7
--- /dev/null
+++ b/base/compiler/ssair/legacy.jl
@@ -0,0 +1,143 @@
+function ssaargmap(f, @nospecialize(stmt))
+    urs = userefs(stmt)
+    urs === () && return stmt
+    for op in urs
+        val = op[]
+        if isa(val, Union{SSAValue, Argument})
+            op[] = f(val)
+        end
+    end
+    urs[]
+end
+
+function replace_code!(ci::CodeInfo, code::IRCode, nargs::Int, topline::LineNumberNode)
+    if !isempty(code.new_nodes)
+        code = compact!(code)
+    end
+    # All but the first `nargs` slots will now be unused
+    resize!(ci.slottypes, nargs+1)
+    resize!(ci.slotnames, nargs+1)
+    resize!(ci.slotflags, nargs+1)
+    # For every used SSAValues, we register one base format ssa value
+    used = IdSet{Int}()
+    foreach(stmt->scan_ssa_use!(used, stmt), code.stmts)
+    mapping = IdDict{Int, Int}()
+    n = 0
+    resize!(ci.ssavaluetypes, length(used))
+    for ssa in sort(Int[x for x in used])
+        mapping[ssa] = n
+        n += 1
+        ci.ssavaluetypes[n] = code.types[ssa]
+    end
+    # Find all jump targets (we need to insert LabelNodes for them) and
+    # jump origins (we insert a label node on the statement after, to
+    # make sure we can track them)
+    dest_blocks = IdSet{Int}()
+    jump_origins = IdSet{Int}()
+    for stmt in code.stmts
+        if isa(stmt, GotoNode)
+            push!(dest_blocks, stmt.label)
+        elseif isa(stmt, GotoIfNot)
+            push!(dest_blocks, stmt.dest)
+        elseif isa(stmt, PhiNode)
+            for edge in stmt.edges
+                push!(jump_origins, edge)
+            end
+        end
+    end
+    cfg = code.cfg
+    block_start = IdDict{Int, Int}(first(cfg.blocks[x].stmts)=>x for x in dest_blocks)
+    comefrom_labels = IdSet{Int}(last(cfg.blocks[x].stmts)+1 for x in jump_origins)
+    block_terminators = IdDict{Int, Int}(last(block.stmts)=>i for (i,block) in pairs(cfg.blocks))
+    local rename
+    let mapping = mapping
+        function rename(@nospecialize(val))
+            if isa(val, SSAValue)
+                if haskey(mapping, val.id)
+                    return SSAValue(mapping[val.id])
+                end
+            elseif isa(val, Argument)
+                return SlotNumber(val.n)
+            end
+            return val
+        end
+    end
+    # Now translate the code
+    new_code = Vector{Any}()
+    append!(new_code, code.meta)
+    label_mapping = IdDict{Int, Int}()
+    terminator_mapping = IdDict{Int, Int}()
+    fixup = Int[]
+    for (idx, stmt) in pairs(code.stmts)
+        line = code.lines[idx]
+        # push labels first
+        if haskey(block_start, idx)
+            push!(new_code, LabelNode(length(new_code) + 1))
+            label_mapping[block_start[idx]] = length(new_code)
+        elseif idx in comefrom_labels
+            push!(new_code, LabelNode(length(new_code) + 1))
+        end
+        # then metadata
+        if !(line.file === nothing && line.line === 0) && !(line === topline)
+            push!(new_code, line)
+            topline = line
+        end
+        # record if this'll need a fixup after stmt number
+        if isa(stmt, GotoIfNot)
+            new_stmt = Expr(:gotoifnot, rename(stmt.cond), stmt.dest)
+            push!(fixup, length(new_code)+1)
+        elseif isa(stmt, ReturnNode)
+            if isdefined(stmt, :val)
+                new_stmt = Expr(:return, rename(stmt.val))
+            else
+                # Unreachable, so no issue with this
+                new_stmt = nothing
+            end
+        elseif isa(stmt, SSAValue)
+            new_stmt = rename(stmt)
+        elseif isa(stmt, PhiNode)
+            new_stmt = ssaargmap(rename, stmt)
+            push!(fixup, length(new_code)+1)
+        elseif isa(stmt, GotoNode)
+            push!(fixup, length(new_code)+1)
+            new_stmt = stmt
+        else
+            new_stmt = ssaargmap(rename, stmt)
+        end
+        if haskey(mapping, idx)
+            new_stmt = Expr(:(=), SSAValue(mapping[idx]), new_stmt)
+        end
+        # record fixup targets
+        if haskey(block_terminators, idx)
+            terminator_mapping[block_terminators[idx]] = length(new_code)+1
+        end
+        # and finally, record the new new statement
+        push!(new_code, new_stmt)
+    end
+    for i in fixup
+        val = new_code[i]
+        isassign = isexpr(val, :(=))
+        if isassign
+            val = val.args[2]
+        end
+        if isa(val, PhiNode)
+            # Translate from BB edges to statement edges
+            edges = Any[terminator_mapping[edge] for edge in val.edges]
+            val = PhiNode(convert(Vector{Any}, edges), val.values)
+        elseif isa(val, GotoNode)
+            val = GotoNode(label_mapping[val.label])
+        elseif isexpr(val, :gotoifnot)
+            val = Expr(:gotoifnot, val.args[1], label_mapping[val.args[2]])
+        else
+            #@show val
+            error()
+        end
+        if isassign
+            new_code[i].args[2] = val
+        else
+            new_code[i] = val
+        end
+    end
+    ci.code = new_code
+    return ci
+end
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
new file mode 100644
index 0000000000000..eb62f76c597b5
--- /dev/null
+++ b/base/compiler/ssair/passes.jl
@@ -0,0 +1,84 @@
+function type_lift_pass!(ir::IRCode)
+    type_ctx_uses = Vector{Vector{Int}}[]
+    has_non_type_ctx_uses = IdSet{Int}()
+    lifted_undef = IdDict{Int, SSAValue}()
+    for (idx, stmt) in pairs(ir.stmts)
+        if stmt isa Expr && (stmt.head === :isdefined || stmt.head === :undefcheck)
+            val = (stmt.head === :isdefined) ? stmt.args[1] : stmt.args[2]
+            # undef can only show up by being introduced in a phi
+            # node, so lift all phi nodes that have maybe undef values
+            processed = IdDict{Int, SSAValue}()
+            if !isa(val, SSAValue)
+                if stmt.head === :undefcheck
+                    ir.stmts[idx] = nothing
+                end
+                continue
+            end
+            worklist = Tuple{Int, SSAValue, Int}[(val.id, SSAValue(0), 0)]
+            stmt_id = val.id
+            while isa(ir.stmts[stmt_id], PiNode)
+                stmt_id = ir.stmts[stmt_id].val.id
+            end
+            def = ir.stmts[stmt_id]
+            if !isa(def, PhiNode)
+                if stmt.head === :isdefined
+                    ir.stmts[idx] = true
+                else
+                    ir.stmts[idx] = nothing
+                end
+                continue
+            end
+            if !haskey(lifted_undef, stmt_id)
+                first = true
+                while !isempty(worklist)
+                    item, which, use = pop!(worklist)
+                    def = ir.stmts[item]
+                    edges = copy(def.edges)
+                    values = Vector{Any}(uninitialized, length(edges))
+                    new_phi = insert_node!(ir, item, Bool, PhiNode(edges, values))
+                    processed[item] = new_phi
+                    if first
+                        lifted_undef[stmt_id] = new_phi
+                        first = false
+                    end
+                    for i = 1:length(edges)
+                        if !isassigned(def.values, i)
+                            val = false
+                        elseif !isa(def.values[i], SSAValue)
+                            val = true
+                        else
+                            id = def.values[i].id
+                            if !isa(ir.types[id], MaybeUndef)
+                                val = true
+                            else
+                                while isa(ir.stmts[id], PiNode)
+                                    id = ir.stmts[id].val.id
+                                end
+                                if isa(ir.stmts[id], PhiNode)
+                                    if haskey(processed, id)
+                                        val = processed[id]
+                                    else
+                                        push!(worklist, (id, new_phi, i))
+                                        continue
+                                    end
+                                else
+                                    val = true
+                                end
+                            end
+                        end
+                        values[i] = val
+                    end
+                    if which !== SSAValue(0)
+                        ir[which].values[use] = new_phi
+                    end
+                end
+            end
+            if stmt.head === :isdefined
+                ir.stmts[idx] = lifted_undef[stmt_id]
+            else
+                ir.stmts[idx] = Expr(:throw_undef_if_not, stmt.args[1], lifted_undef[stmt_id])
+            end
+        end
+    end
+    ir
+end
diff --git a/base/compiler/ssair/queries.jl b/base/compiler/ssair/queries.jl
new file mode 100644
index 0000000000000..70a7757f8ad94
--- /dev/null
+++ b/base/compiler/ssair/queries.jl
@@ -0,0 +1,9 @@
+function stmt_effect_free(@nospecialize(stmt), src::IRCode, mod::Module)
+    isa(stmt, Union{PiNode, PhiNode}) && return true
+    isa(stmt, Union{ReturnNode, GotoNode, GotoIfNot}) && return false
+    return statement_effect_free(stmt, src, mod)
+end
+
+function abstract_eval_ssavalue(s::SSAValue, src::IRCode)
+    return src.types[s.id]
+end
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
new file mode 100644
index 0000000000000..8c9b28cf25052
--- /dev/null
+++ b/base/compiler/ssair/show.jl
@@ -0,0 +1,138 @@
+function Base.show(io::IO, cfg::CFG)
+    foreach(pairs(cfg.blocks)) do (idx, block)
+        println("$idx\t=>\t", join(block.succs, ", "))
+    end
+end
+
+print_ssa(io::IO, val) = isa(val, SSAValue) ? print(io, "%$(val.id)") : print(io, val)
+function print_node(io::IO, idx, stmt, used, maxsize; color = true, print_typ=true)
+    if idx in used
+        pad = " "^(maxsize-length(string(idx)))
+        print(io, "%$idx $pad= ")
+    else
+        print(io, " "^(maxsize+4))
+    end
+    if isa(stmt, PhiNode)
+        args = map(1:length(stmt.edges)) do i
+            e = stmt.edges[i]
+            v = !isassigned(stmt.values, i) ? "#undef" :
+                sprint() do io′
+                    print_ssa(io′, stmt.values[i])
+                end
+            "$e => $v"
+        end
+        print(io, "φ ", '(', join(args, ", "), ')')
+    elseif isa(stmt, PiNode)
+        print(io, "π (")
+        print_ssa(io, stmt.val)
+        print(io, ", ")
+        if color
+            printstyled(io, stmt.typ, color=:red)
+        else
+            print(io, stmt.typ)
+        end
+        print(io, ")")
+    elseif isa(stmt, ReturnNode)
+        if !isdefined(stmt, :val)
+            print(io, "unreachable")
+        else
+            print(io, "return ")
+            print_ssa(io, stmt.val)
+        end
+    elseif isa(stmt, GotoIfNot)
+        print(io, "goto ", stmt.dest, " if not ")
+        print_ssa(io, stmt.cond)
+    elseif isexpr(stmt, :call)
+        print_ssa(io, stmt.args[1])
+        print(io, "(")
+        print(io, join(map(arg->sprint(io->print_ssa(io, arg)), stmt.args[2:end]), ", "))
+        print(io, ")")
+        if print_typ && stmt.typ !== Any
+            print(io, "::$(stmt.typ)")
+        end
+    elseif isexpr(stmt, :new)
+        print(io, "new(")
+        print(io, join(map(arg->sprint(io->print_ssa(io, arg)), stmt.args), ", "))
+        print(io, ")")
+    else
+        print(io, stmt)
+    end
+end
+
+function Base.show(io::IO, code::IRCode)
+    io = IOContext(io, :color=>true)
+    used = Set{Int}()
+    println(io, "Code")
+    foreach(stmt->scan_ssa_use!(used, stmt), code.stmts)
+    foreach(((_a, _b, node, _d),) -> scan_ssa_use!(used, node), code.new_nodes)
+    if isempty(used)
+        maxsize = 0
+    else
+        maxused = maximum(used)
+        maxsize = length(string(maxused))
+    end
+    cfg = code.cfg
+    max_bb_idx_size = length(string(length(cfg.blocks)))
+    bb_idx = 1
+    perm = sortperm(code.new_nodes, by = x->x[1])
+    new_nodes_perm = Iterators.Stateful(perm)
+    for (idx, stmt) in Iterators.enumerate(code.stmts)
+        bbrange = cfg.blocks[bb_idx].stmts
+        bbrange = bbrange.first:bbrange.last
+        bb_pad = max_bb_idx_size - length(string(bb_idx))
+        if idx != last(bbrange)
+            if idx == first(bbrange)
+                print(io, "$(bb_idx) ","─"^(1+bb_pad)," ")
+            else
+                print(io, "│  "," "^max_bb_idx_size)
+            end
+        end
+        print_sep = false
+        if idx == last(bbrange)
+            print_sep = true
+        end
+        floop = true
+        while !isempty(new_nodes_perm) && code.new_nodes[Base.peek(new_nodes_perm)][1] == idx
+            node_idx = popfirst!(new_nodes_perm)
+            _, typ, node, line = code.new_nodes[node_idx]
+            node_idx += length(code.stmts)
+            if print_sep
+                if floop
+                    print(io, "$(bb_idx) ","─"^(1+bb_pad)," ")
+                else
+                    print(io, "│  "," "^max_bb_idx_size)
+                end
+            end
+            print_sep = true
+            floop = false
+            print_ssa_typ = !isa(node, PiNode) && node_idx in used
+            Base.with_output_color(:yellow, io) do io′
+                print_node(io′, node_idx, node, used, maxsize; color = false,
+                    print_typ=!print_ssa_typ || (isa(node, Expr) && typ != node.typ))
+            end
+            if print_ssa_typ
+                printstyled(io, "::$(typ)", color=:red)
+            end
+            println(io)
+        end
+        if print_sep
+            if idx == first(bbrange) && floop
+                print(io, "$(bb_idx) ","─"^(1+bb_pad)," ")
+            else
+                print(io, idx == last(bbrange) ? string("└", "─"^(1+max_bb_idx_size), " ") :
+                    string("│  ", " "^max_bb_idx_size))
+            end
+        end
+        if idx == last(bbrange)
+            bb_idx += 1
+        end
+        typ = code.types[idx]
+        print_ssa_typ = !isa(stmt, PiNode) && idx in used
+        print_node(io, idx, stmt, used, maxsize,
+            print_typ=!print_ssa_typ || (isa(stmt, Expr) && typ != stmt.typ))
+        if print_ssa_typ
+            printstyled(io, "::$(typ)", color=:red)
+        end
+        println(io)
+    end
+end
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
new file mode 100644
index 0000000000000..eaf44dbcc3aa3
--- /dev/null
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -0,0 +1,444 @@
+mutable struct SlotInfo
+    defs::Vector{Int}
+    uses::Vector{Int}
+    any_newvar::Bool
+end
+SlotInfo() = SlotInfo(Int[], Int[], false)
+
+function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
+    # NewVarNodes count as defs for the purpose
+    # of liveness analysis (i.e. they kill use chains)
+    if isa(stmt, NewvarNode)
+        result[slot_id(stmt.slot)].any_newvar = true
+        push!(result[slot_id(stmt.slot)].defs, idx)
+        return
+    elseif isexpr(stmt, :(=))
+        if isa(stmt.args[1], SlotNumber)
+            push!(result[slot_id(stmt.args[1])].defs, idx)
+        end
+        stmt = stmt.args[2]
+    end
+    if isa(stmt, Union{SlotNumber, TypedSlot})
+        push!(result[slot_id(stmt)].uses, idx)
+        return
+    end
+    for op in userefs(stmt)
+        val = op[]
+        if isa(val, Union{SlotNumber, TypedSlot})
+            push!(result[slot_id(val)].uses, idx)
+        end
+    end
+end
+
+
+function lift_defuse(cfg::CFG, defuse)
+    map(defuse) do slot
+        SlotInfo(
+            Int[block_for_inst(cfg, x) for x in slot.defs],
+            Int[block_for_inst(cfg, x) for x in slot.uses],
+            slot.any_newvar
+        )
+    end
+end
+
+@inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id : (s::TypedSlot).id
+function scan_slot_def_use(nargs, ci::CodeInfo)
+    nslots = length(ci.slotnames)
+    result = SlotInfo[SlotInfo() for i = 1:nslots]
+    # Set defs for arguments
+    for var in result[1:(1+nargs)]
+        push!(var.defs, 0)
+    end
+    for (idx, stmt) in Iterators.enumerate(ci.code)
+        scan_entry!(result, idx, stmt)
+    end
+    result
+end
+
+function renumber_ssa(stmt::SSAValue, ssanums::Vector{Any}, new_ssa::Bool=false, used_ssa::Union{Nothing, Vector{Int}}=nothing)
+    id = stmt.id + (new_ssa ? 0 : 1)
+    if id > length(ssanums)
+        return stmt
+    end
+    val = ssanums[id]
+    if isa(val, SSAValue) && used_ssa !== nothing
+        used_ssa[val.id] += 1
+    end
+    return val
+end
+
+function renumber_ssa!(@nospecialize(stmt), ssanums::Vector{Any}, new_ssa::Bool=false, used_ssa::Union{Nothing, Vector{Int}}=nothing)
+    isa(stmt, SSAValue) && return renumber_ssa(stmt, ssanums, new_ssa, used_ssa)
+    return ssamap(val->renumber_ssa(val, ssanums, new_ssa, used_ssa), stmt)
+end
+
+function make_ssa!(ci::CodeInfo, idx, slot, @nospecialize(typ))
+    (idx == 0) && return Argument(slot)
+    stmt = ci.code[idx]
+    @assert isexpr(stmt, :(=))
+    push!(ci.ssavaluetypes, typ)
+    ssa = length(ci.ssavaluetypes)-1
+    stmt.args[1] = SSAValue(ssa)
+    ssa
+end
+
+struct UndefToken
+end
+const undef_token = UndefToken()
+
+function new_to_regular(@nospecialize(stmt))
+    if isa(stmt, NewSSAValue)
+        return SSAValue(stmt.id)
+    end
+    urs = userefs(stmt)
+    urs === () && return stmt
+    for op in urs
+        val = op[]
+        if isa(val, NewSSAValue)
+            op[] = SSAValue(val.id)
+        end
+    end
+    urs[]
+end
+
+function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(stmt::Union{SlotNumber, TypedSlot}), @nospecialize(ssa))
+    # We don't really have the information here to get rid of these.
+    # We'll do so later
+    if ssa === undef_token
+        insert_node!(ir, idx, Any, Expr(:throw_undef_if_not, ci.slotnames[slot], false))
+        return undef_token
+    end
+    if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0)
+        insert_node!(ir, idx, Any, Expr(:undefcheck, ci.slotnames[slot], ssa))
+    end
+    if isa(stmt, SlotNumber)
+        return ssa
+    elseif isa(stmt, TypedSlot)
+        return NewSSAValue(insert_node!(ir, idx, stmt.typ, PiNode(ssa, stmt.typ)).id)
+    end
+end
+
+function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt))
+    if isa(stmt, Union{SlotNumber, TypedSlot}) && cond(stmt)
+        return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename(stmt))
+    end
+    if isexpr(stmt, :(=))
+        stmt.args[2] = fixemup!(cond, rename, ir, ci, idx, stmt.args[2])
+        return stmt
+    end
+    if isa(stmt, PhiNode)
+        for i = 1:length(stmt.edges)
+            isassigned(stmt.values, i) || continue
+            val = stmt.values[i]
+            isa(val, Union{SlotNumber, TypedSlot}) || continue
+            cond(val) || continue
+            bb_idx = block_for_inst(ir.cfg, stmt.edges[i])
+            from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts)
+            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename(val))
+        end
+        return stmt
+    end
+    if isexpr(stmt, :isdefined)
+        val = stmt.args[1]
+        if isa(val, Union{SlotNumber, TypedSlot})
+            slot = slot_id(val)
+            if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0
+                return true
+            else
+                ssa = rename(val)
+                if ssa === undef_token
+                    return false
+                elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
+                    return true
+                end
+            end
+            stmt.args[1] = ssa
+        end
+        return stmt
+    end
+    urs = userefs(stmt)
+    urs === () && return stmt
+    for op in urs
+        val = op[]
+        if isa(val, Union{SlotNumber, TypedSlot}) && cond(val)
+            x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename(val))
+            # We inserted an undef error node. Delete subsequent statement
+            # to avoid confusing the optimizer
+            if x === undef_token
+                return nothing
+            end
+            op[] = x
+        end
+    end
+    urs[]
+end
+
+function fixup_uses!(ir::IRCode, ci::CodeInfo, uses::Vector{Int}, slot, @nospecialize(ssa))
+    for use in uses
+        ci.code[use] = fixemup!(stmt->slot_id(stmt)==slot, stmt->ssa, ir, ci, use, ci.code[use])
+    end
+end
+
+function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Any})
+    return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
+end
+
+function strip_trailing_junk!(code::Vector{Any}, lines::Vector{LineNumberNode})
+    # Remove `nothing`s at the end, we don't handle them well
+    # (we expect the last instruction to be a terminator)
+    for i = length(code):-1:1
+        if code[i] !== nothing
+            resize!(code, i)
+            resize!(lines, i)
+            break
+        end
+    end
+    # If the last instruction is not a terminator, add one. This can
+    # happen for implicit return on dead branches.
+    term = code[end]
+    if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
+        push!(code, ReturnNode{Any}())
+        push!(lines, LineNumberNode(0))
+    end
+    return code
+end
+
+struct DelayedTyp
+    phi::NewSSAValue
+end
+
+# maybe use expr_type?
+function typ_for_val(@nospecialize(val), ci::CodeInfo)
+    isa(val, Expr) && return val.typ
+    isa(val, GlobalRef) && return abstract_eval_global(val.mod, val.name)
+    isa(val, SSAValue) && return ci.ssavaluetypes[val.id+1]
+    isa(val, Argument) && return ci.slottypes[val.n]
+    isa(val, NewSSAValue) && return DelayedTyp(val)
+    isa(val, QuoteNode) && return Const(val.value)
+    isa(val, Union{Symbol, PiNode, PhiNode, SlotNumber, TypedSlot}) && error("unexpected val type")
+    return Const(val)
+end
+
+# Run iterated dominance frontier
+function idf(cfg::CFG, defuse, domtree::DomTree, slot::Int)
+    # This should be a priority queue, but TODO - sorted array for now
+    defs = defuse[slot].defs
+    pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
+    sort!(pq, by=x->x[2])
+    phiblocks = Int[]
+    processed = IdSet{Int}()
+    while !isempty(pq)
+        node, level = pop!(pq)
+        worklist = Int[]
+        visited = IdSet{Int}()
+        push!(worklist, node)
+        while !isempty(worklist)
+            active = pop!(worklist)
+            for succ in cfg.blocks[active].succs
+                succ_level = domtree.nodes[succ].level
+                succ_level > level && continue
+                succ in processed && continue
+                push!(processed, succ)
+                # <- TODO: Use liveness here
+                push!(phiblocks, succ)
+                if !(succ in defuse[slot].defs)
+                    push!(pq, (succ, succ_level))
+                    sort!(pq, by=x->x[2])
+                end
+            end
+
+            for child in domtree.nodes[active].children
+                child in visited && continue
+                push!(visited, child)
+                push!(worklist, child)
+            end
+        end
+    end
+    phiblocks
+end
+
+function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int)
+    cfg = ir.cfg
+    left = Int[]
+    defuse_blocks = lift_defuse(ir.cfg, defuse)
+    phi_slots = Vector{Int}[Vector{Int}() for _ = 1:length(ir.cfg.blocks)]
+    phi_nodes = Vector{Pair{Int,PhiNode}}[Vector{Pair{Int,PhiNode}}() for _ = 1:length(cfg.blocks)]
+    phi_ssas = SSAValue[]
+    for (idx, slot) in Iterators.enumerate(defuse)
+        # No uses => no need for phi nodes
+        isempty(slot.uses) && continue
+        # TODO: Restore this optimization
+        if false # length(slot.defs) == 1 && slot.any_newvar
+            if slot.defs[] == 0
+                typ = ci.slottypes[idx]
+                ssaval = Argument(idx)
+                fixup_uses!(ir, ci, slot.uses, idx, ssaval)
+            elseif isa(ci.code[slot.defs[]], NewvarNode)
+                typ = MaybeUndef(Union{})
+                ssaval = nothing
+                for use in slot.uses[]
+                    insert_node!(ir, use, Union{}, Expr(:throw_undef_if_not, ci.slotnames[idx], false))
+                end
+                fixup_uses!(ir, ci, slot.uses, idx, nothing)
+            else
+                val = ci.code[slot.defs[]].args[2]
+                typ = typ_for_val(val, ci)
+                ssaval = SSAValue(make_ssa!(ci, slot.defs[], idx, typ))
+                fixup_uses!(ir, ci, slot.uses, idx, ssaval)
+            end
+            continue
+        end
+        # TODO: Perform liveness here to eliminate dead phi nodes
+        phiblocks = idf(cfg, defuse_blocks, domtree, idx)
+        for block in phiblocks
+            push!(phi_slots[block], idx)
+            node = PhiNode()
+            ssa = insert_node!(ir, first_insert_for_bb(ci.code, cfg, block), Union{}, node)
+            push!(phi_nodes[block], ssa.id=>node)
+        end
+        push!(left, idx)
+    end
+    # Perform SSA renaming
+    initial_incoming_vals = Any[
+        if 0 in defuse[x].defs
+            Argument(x)
+        elseif !defuse[x].any_newvar
+            undef_token
+        else
+            SSAValue(-1)
+        end for x in 1:length(ci.slotnames)
+    ]
+    worklist = Any[(1, 0, initial_incoming_vals)]
+    visited = IdSet{Int}()
+    type_refine_phi = IdSet{Int}()
+    while !isempty(worklist)
+        (item, pred, incoming_vals) = pop!(worklist)
+        # Insert phi nodes if necessary
+        for (idx, slot) in Iterators.enumerate(phi_slots[item])
+            ssaval, node = phi_nodes[item][idx]
+            incoming_val = incoming_vals[slot]
+            if incoming_val == SSAValue(-1)
+                # Optimistically omit this path.
+                # Liveness analysis would probably have prevented us from inserting this phi node
+                continue
+            end
+            push!(node.edges, pred)
+            if incoming_val == undef_token
+                resize!(node.values, length(node.values)+1)
+            else
+                push!(node.values, incoming_val)
+            end
+            # TODO: Remove the next line, it shouldn't be necessary
+            push!(type_refine_phi, ssaval)
+            if isa(incoming_val, NewSSAValue)
+                push!(type_refine_phi, ssaval)
+            end
+            typ = incoming_val == undef_token ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci)
+            new_node_id = ssaval - length(ir.stmts)
+            old_insert, old_typ, _, old_line = ir.new_nodes[new_node_id]
+            if isa(typ, DelayedTyp)
+                push!(type_refine_phi, ssaval)
+            end
+            new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(old_typ, typ)
+            ir.new_nodes[new_node_id] = (old_insert, new_typ, node, old_line)
+            incoming_vals[slot] = NewSSAValue(ssaval)
+        end
+        (item in visited) && continue
+        push!(visited, item)
+        for idx in cfg.blocks[item].stmts
+            stmt = ci.code[idx]
+            if isa(stmt, NewvarNode)
+                incoming_vals[slot_id(stmt.slot)] = undef_token
+                ci.code[idx] = nothing
+            else
+                stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
+                if stmt === nothing && idx == last(cfg.blocks[item].stmts)
+                    # preserve the CFG
+                    stmt = ReturnNode{Any}()
+                end
+                ci.code[idx] = stmt
+                # Record a store
+                if isexpr(stmt, :(=)) && isa(stmt.args[1], SlotNumber)
+                    id = slot_id(stmt.args[1])
+                    val = stmt.args[2]
+                    typ = typ_for_val(val, ci)
+                    incoming_vals[id] = SSAValue(make_ssa!(ci, idx, id, typ))
+                end
+            end
+        end
+        for succ in cfg.blocks[item].succs
+            push!(worklist, (succ, item, copy(incoming_vals)))
+        end
+    end
+    # Delete any instruction in unreachable blocks
+    for bb in setdiff(IdSet{Int}(1:length(cfg.blocks)), visited)
+        for idx in cfg.blocks[bb].stmts
+            ci.code[idx] = nothing
+        end
+    end
+    # Convert into IRCode form
+    code = ir.stmts
+    ssavalmap = Any[SSAValue(-1) for _ in 1:(length(ci.ssavaluetypes)+1)]
+    types = Any[Any for _ in 1:length(code)]
+    # Detect statement positions for assignments and construct array
+    for (idx, stmt) in Iterators.enumerate(ci.code)
+        if isexpr(stmt, :(=)) && isa(stmt.args[1], SSAValue)
+            ssavalmap[stmt.args[1].id + 1] = SSAValue(idx)
+            types[idx] = ci.ssavaluetypes[stmt.args[1].id + 1]
+            stmt = stmt.args[2]
+            if isa(stmt, PhiNode)
+                edges = Any[block_for_inst(cfg, edge) for edge in stmt.edges]
+                code[idx] = PhiNode(edges, stmt.values)
+            else
+                code[idx] = stmt
+            end
+        # Convert GotoNode/GotoIfNot/PhiNode to BB addressing
+        elseif isa(stmt, GotoNode)
+            code[idx] = GotoNode(block_for_inst(cfg, stmt.label))
+        elseif isa(stmt, GotoIfNot)
+            code[idx] = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
+        else
+            code[idx] = stmt
+        end
+    end
+    # This is a bit awkward, because it basically duplicates what type
+    # inference does. Ideally, we'd just use this representation earlier
+    # to make sure phi nodes have accurate types
+    changed = true
+    while changed
+        changed = false
+        for phi in type_refine_phi
+            new_idx = phi - length(ir.stmts)
+            old_insert, old_typ, node, old_line = ir.new_nodes[new_idx]
+            new_typ = Union{}
+            for i = 1:length(node.values)
+                if !isassigned(node.values, i)
+                    if !isa(new_typ, MaybeUndef)
+                        new_typ = MaybeUndef(new_typ)
+                    end
+                    continue
+                end
+                typ = typ_for_val(node.values[i], ci)
+                if isa(typ, DelayedTyp)
+                    typ = ir.new_nodes[typ.phi.id - length(ir.stmts)][2]
+                end
+                new_typ = tmerge(new_typ, typ)
+            end
+            if !(old_typ ⊑ new_typ) || !(new_typ ⊑ old_typ)
+                ir.new_nodes[new_idx] = (old_insert, new_typ, node, old_line)
+                changed = true
+            end
+        end
+    end
+    types = Any[isa(types[i], DelayedTyp) ? ir.new_nodes[types[i].phi.id - length(ir.stmts)][2] : types[i] for i in 1:length(types)]
+    new_nodes = NewNode[let (pos, typ, node, line) = ir.new_nodes[i]
+            typ = isa(typ, DelayedTyp) ? ir.new_nodes[typ.phi.id - length(ir.stmts)][2] : typ
+            (pos, typ, node, line)
+        end for i in 1:length(ir.new_nodes)]
+    # Renumber SSA values
+    code = Any[new_to_regular(renumber_ssa!(code[i], ssavalmap)) for i in 1:length(code)]
+    new_nodes = NewNode[let (pt, typ, stmt, line) = new_nodes[i]
+            (pt, typ, new_to_regular(renumber_ssa!(stmt, ssavalmap)), line)
+        end for i in 1:length(new_nodes)]
+    return IRCode(ir, code, types, ir.lines, ir.cfg, new_nodes)
+end
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
new file mode 100644
index 0000000000000..9886bc9b4bbd4
--- /dev/null
+++ b/base/compiler/ssair/verify.jl
@@ -0,0 +1,52 @@
+function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int)
+    if isa(op, SSAValue)
+        def_bb = block_for_inst(ir.cfg, op.id)
+        if (def_bb == use_bb)
+            @assert op.id < use_idx
+        else
+            if !dominates(domtree, def_bb, use_bb)
+                #@error "Basic Block $def_bb does not dominate block $use_bb"
+                error()
+            end
+        end
+    elseif isa(op, Union{SlotNumber, TypedSlot})
+        #@error "Left over slot detected in converted IR"
+        error()
+    end
+end
+
+function verify_ir(ir::IRCode)
+    # For now require compact IR
+    @assert isempty(ir.new_nodes)
+    domtree = construct_domtree(ir.cfg)
+    for (bb, idx, stmt) in bbidxstmt(ir)
+        if isa(stmt, PhiNode)
+            @assert length(stmt.edges) == length(stmt.values)
+            for i = 1:length(stmt.edges)
+                edge = stmt.edges[i]
+                if !(edge in ir.cfg.blocks[bb].preds)
+                    error()
+                end
+                isassigned(stmt.values, i) || continue
+                val = stmt.values[i]
+                phiT = ir.types[idx]
+                if isa(val, SSAValue)
+                    if !(ir.types[val.id] ⊑ phiT)
+                        #@error """
+                        #    PhiNode $idx, has operand $(val.id), whose type is not a sub lattice element.
+                        #    PhiNode type was $phiT
+                        #    Value type was $(ir.types[val.id])
+                        #"""
+                        #error()
+                    end
+                end
+                check_op(ir, domtree, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1)
+            end
+        else
+            for op in userefs(stmt)
+                op = op[]
+                check_op(ir, domtree, op, bb, idx)
+            end
+        end
+    end
+end
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index ff8d272946a55..3a2fb6a19e634 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -48,6 +48,11 @@ struct PartialTypeVar
     PartialTypeVar(tv::TypeVar, lb_certain::Bool, ub_certain::Bool) = new(tv, lb_certain, ub_certain)
 end
 
+# Wraps a type and represents that the value may also be undef at this point.
+struct MaybeUndef
+    typ
+end
+
 # The type of a variable load is either a value or an UndefVarError
 struct VarState
     typ
@@ -120,6 +125,11 @@ end
 maybe_extract_const_bool(c) = nothing
 
 function ⊑(@nospecialize(a), @nospecialize(b))
+    if isa(a, MaybeUndef) && !isa(b, MaybeUndef)
+        return false
+    end
+    isa(a, MaybeUndef) && (a = a.typ)
+    isa(b, MaybeUndef) && (b = b.typ)
     (a === NOT_FOUND || b === Any) && return true
     (a === Any || b === NOT_FOUND) && return false
     a === Union{} && return true
@@ -160,6 +170,7 @@ function widenconst(c::Const)
         return typeof(c.val)
     end
 end
+widenconst(m::MaybeUndef) = widenconst(m.typ)
 widenconst(c::PartialTypeVar) = TypeVar
 widenconst(@nospecialize(t)) = t
 
@@ -168,6 +179,11 @@ issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef)
 function tmerge(@nospecialize(typea), @nospecialize(typeb))
     typea ⊑ typeb && return typeb
     typeb ⊑ typea && return typea
+    if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef)
+        return MaybeUndef(tmerge(
+            isa(typea, MaybeUndef) ? typea.typ : typea,
+            isa(typeb, MaybeUndef) ? typeb.typ : typeb))
+    end
     if isa(typea, Conditional) && isa(typeb, Conditional)
         if typea.var === typeb.var
             vtype = tmerge(typea.vtype, typeb.vtype)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index b11651a5d6845..34234d2b10048 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -172,7 +172,7 @@ function method_for_inference_heuristics(method::Method, @nospecialize(sig), spa
     return method
 end
 
-function exprtype(@nospecialize(x), src::CodeInfo, mod::Module)
+function exprtype(@nospecialize(x), src, mod::Module)
     if isa(x, Expr)
         return (x::Expr).typ
     elseif isa(x, SlotNumber)
@@ -187,6 +187,10 @@ function exprtype(@nospecialize(x), src::CodeInfo, mod::Module)
         return AbstractEvalConstant((x::QuoteNode).value)
     elseif isa(x, GlobalRef)
         return abstract_eval_global(x.mod, (x::GlobalRef).name)
+    elseif isa(x, PhiNode)
+        return Any
+    elseif isa(x, PiNode)
+        return x.typ
     else
         return AbstractEvalConstant(x)
     end
@@ -242,18 +246,31 @@ end
 ##############
 
 # scan body for the value of the largest referenced label
-function label_counter(body::Vector{Any})
+# so that we won't accidentally re-use it
+function label_counter(body::Vector{Any}, comefrom=true)
     l = 0
     for b in body
         label = 0
-        if isa(b, GotoNode)
+        if isa(b, LabelNode) && comefrom
             label = b.label::Int
-        elseif isa(b, LabelNode)
-            label = b.label
-        elseif isa(b, Expr) && b.head == :gotoifnot
-            label = b.args[2]::Int
-        elseif isa(b, Expr) && b.head == :enter
-            label = b.args[1]::Int
+        elseif isa(b, GotoNode)
+            label = b.label::Int
+        elseif isa(b, Expr)
+            if b.head == :gotoifnot
+                label = b.args[2]::Int
+            elseif b.head == :enter
+                label = b.args[1]::Int
+            elseif b.head === :(=) && comefrom
+                rhs = b.args[2]
+                if isa(rhs, PhiNode)
+                    for edge in rhs.edges
+                        edge = edge::Int + 1
+                        if edge > l
+                            l = edge
+                        end
+                    end
+                end
+            end
         end
         if label > l
             l = label
@@ -268,6 +285,7 @@ function get_label_map(body::Vector{Any})
     for i = 1:length(body)
         el = body[i]
         if isa(el, LabelNode)
+            # @assert labelmap[el.label] == 0
             labelmap[el.label] = i
         end
     end
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 2dadd460652a7..6a913e1d158c4 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -25,7 +25,8 @@ const VALID_EXPR_HEADS = IdDict{Any,Any}(
     :simdloop => 0:0,
     :gc_preserve_begin => 0:typemax(Int),
     :gc_preserve_end => 0:typemax(Int),
-    :thunk => 1:1
+    :thunk => 1:1,
+    :throw_undef_if_not => 2:2
 )
 
 # @enum isn't defined yet, otherwise I'd use it for this
@@ -139,7 +140,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
             elseif head === :call || head === :invoke || head == :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :const || head === :enter ||
                 head === :leave || head === :method || head === :global || head === :static_parameter ||
-                head === :new || head === :thunk || head === :simdloop
+                head === :new || head === :thunk || head === :simdloop || head === :throw_undef_if_not
                 validate_val!(x)
             else
                 push!(errors, InvalidCodeError("invalid statement", x))
diff --git a/base/iterators.jl b/base/iterators.jl
index 2f69d74346208..817c699f26f9c 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -11,7 +11,8 @@ const Base = parentmodule(@__MODULE__)
 using .Base:
     @inline, Pair, AbstractDict, IndexLinear, IndexCartesian, IndexStyle, AbstractVector, Vector,
     tail, tuple_type_head, tuple_type_tail, tuple_type_cons, SizeUnknown, HasLength, HasShape,
-    IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds, Generator, AbstractRange
+    IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds, Generator, AbstractRange,
+    linearindices, (:), |, +, -, !==, !
 
 import .Base:
     start, done, next, first, last,
@@ -1064,19 +1065,23 @@ function reset!(s::Stateful{T,VS}, itr::T) where {T,VS}
     s
 end
 
-# Try to find an appropriate type for the (value, state tuple),
-# by doing a recursive unrolling of the iteration protocol up to
-# fixpoint.
-function fixpoint_iter_type(itrT::Type, valT::Type, stateT::Type)
-    nextvalstate = Base._return_type(next, Tuple{itrT, stateT})
-    nextvalstate <: Tuple{Any, Any} || return Any
-    nextvalstate = Tuple{
-        typejoin(valT, fieldtype(nextvalstate, 1)),
-        typejoin(stateT, fieldtype(nextvalstate, 2))}
-    return (Tuple{valT, stateT} == nextvalstate ? nextvalstate :
-        fixpoint_iter_type(itrT,
-            fieldtype(nextvalstate, 1),
-            fieldtype(nextvalstate, 2)))
+if Base === Core.Compiler
+    fixpoint_iter_type(a, b, c) = Any
+else
+    # Try to find an appropriate type for the (value, state tuple),
+    # by doing a recursive unrolling of the iteration protocol up to
+    # fixpoint.
+    function fixpoint_iter_type(itrT::Type, valT::Type, stateT::Type)
+        nextvalstate = Base._return_type(next, Tuple{itrT, stateT})
+        nextvalstate <: Tuple{Any, Any} || return Any
+        nextvalstate = Tuple{
+            typejoin(valT, fieldtype(nextvalstate, 1)),
+            typejoin(stateT, fieldtype(nextvalstate, 2))}
+        return (Tuple{valT, stateT} == nextvalstate ? nextvalstate :
+            fixpoint_iter_type(itrT,
+                fieldtype(nextvalstate, 1),
+                fieldtype(nextvalstate, 2)))
+    end
 end
 
 convert(::Type{Stateful}, itr) = Stateful(itr)
diff --git a/base/ordering.jl b/base/ordering.jl
index 8f7f581f577e1..52320ac83ae89 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -2,6 +2,13 @@
 
 module Order
 
+
+import ..@__MODULE__, ..parentmodule
+const Base = parentmodule(@__MODULE__)
+import .Base:
+    AbstractVector, @propagate_inbounds, isless, identity, getindex,
+    +, -, !, &, <, |
+
 ## notions of element ordering ##
 
 export # not exported by Base
@@ -44,7 +51,7 @@ lt(o::ReverseOrdering,       a, b) = lt(o.fwd,b,a)
 lt(o::By,                    a, b) = isless(o.by(a),o.by(b))
 lt(o::Lt,                    a, b) = o.lt(a,b)
 
-Base.@propagate_inbounds function lt(p::Perm, a::Integer, b::Integer)
+@propagate_inbounds function lt(p::Perm, a::Integer, b::Integer)
     da = p.data[a]
     db = p.data[b]
     lt(p.order, da, db) | (!lt(p.order, db, da) & (a < b))
diff --git a/base/sort.jl b/base/sort.jl
index 5713bd729b89f..8819f4c97da83 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -2,8 +2,17 @@
 
 module Sort
 
-using .Base.Order, .Base.Checked
-using .Base: copymutable, linearindices, IndexStyle, viewindexing, IndexLinear, _length
+import ..@__MODULE__, ..parentmodule
+const Base = parentmodule(@__MODULE__)
+using .Base.Order
+using .Base: copymutable, linearindices, IndexStyle, viewindexing, IndexLinear, _length, (:),
+    eachindex, axes, first, last, similar, start, next, done, zip, @views, OrdinalRange,
+    AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline,
+    AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !,
+    extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!,
+    length, resize!, fill
+
+using .Base: >>>, !==
 
 import .Base:
     sort,
@@ -819,7 +828,11 @@ function sortperm_int_range(x::Vector{<:Integer}, rangelen, minval)
     @inbounds for i = 1:n
         where[x[i] + offs + 1] += 1
     end
-    cumsum!(where, where)
+
+    #cumsum!(where, where)
+    @inbounds for i = 2:length(where)
+        where[i] += where[i-1]
+    end
 
     P = Vector{Int}(uninitialized, n)
     @inbounds for i = 1:n
@@ -985,6 +998,7 @@ slice_dummy(::AbstractUnitRange{T}) where {T} = oneunit(T)
 module Float
 using ..Sort
 using ...Order
+using ..Base: @inbounds, AbstractVector, Vector, last, axes
 
 import Core.Intrinsics: slt_int
 import ..Sort: sort!
diff --git a/base/stream.jl b/base/stream.jl
index 6a433578ed4fc..775b8852bd1ac 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -898,8 +898,8 @@ function unsafe_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
 end
 
 function flush(s::LibuvStream)
-    if s.sendbuf !== nothing
-        buf = s.sendbuf
+    buf = s.sendbuf
+    if buf !== nothing
         if bytesavailable(buf) > 0
             arr = take!(buf)        # Array of UInt8s
             uv_write(s, arr)
@@ -915,8 +915,8 @@ buffer_writes(s::LibuvStream, bufsize) = (s.sendbuf=PipeBuffer(bufsize); s)
 ## low-level calls to libuv ##
 
 function write(s::LibuvStream, b::UInt8)
-    if s.sendbuf !== nothing
-        buf = s.sendbuf
+    buf = s.sendbuf
+    if buf !== nothing
         if bytesavailable(buf) + 1 < buf.maxsize
             return write(buf, b)
         end
diff --git a/base/subarray.jl b/base/subarray.jl
index d975a07a27581..6a6dc96eaed45 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -363,216 +363,3 @@ function parentdims(s::SubArray)
     end
     dimindex
 end
-
-"""
-    replace_ref_end!(ex)
-
-Recursively replace occurrences of the symbol :end in a "ref" expression (i.e. A[...]) `ex`
-with the appropriate function calls (`lastindex` or `size`). Replacement uses
-the closest enclosing ref, so
-
-    A[B[end]]
-
-should transform to
-
-    A[B[lastindex(B)]]
-
-"""
-replace_ref_end!(ex) = replace_ref_end_!(ex, nothing)[1]
-# replace_ref_end_!(ex,withex) returns (new ex, whether withex was used)
-function replace_ref_end_!(ex, withex)
-    used_withex = false
-    if isa(ex,Symbol) && ex == :end
-        withex === nothing && error("Invalid use of end")
-        return withex, true
-    elseif isa(ex,Expr)
-        if ex.head == :ref
-            ex.args[1], used_withex = replace_ref_end_!(ex.args[1],withex)
-            S = isa(ex.args[1],Symbol) ? ex.args[1]::Symbol : gensym(:S) # temp var to cache ex.args[1] if needed
-            used_S = false # whether we actually need S
-            # new :ref, so redefine withex
-            nargs = length(ex.args)-1
-            if nargs == 0
-                return ex, used_withex
-            elseif nargs == 1
-                # replace with lastindex(S)
-                ex.args[2], used_S = replace_ref_end_!(ex.args[2],:($lastindex($S)))
-            else
-                n = 1
-                J = lastindex(ex.args)
-                for j = 2:J
-                    exj, used = replace_ref_end_!(ex.args[j],:($lastindex($S,$n)))
-                    used_S |= used
-                    ex.args[j] = exj
-                    if isa(exj,Expr) && exj.head == :...
-                        # splatted object
-                        exjs = exj.args[1]
-                        n = :($n + length($exjs))
-                    elseif isa(n, Expr)
-                        # previous expression splatted
-                        n = :($n + 1)
-                    else
-                        # an integer
-                        n += 1
-                    end
-                end
-            end
-            if used_S && S !== ex.args[1]
-                S0 = ex.args[1]
-                ex.args[1] = S
-                ex = Expr(:let, :($S = $S0), ex)
-            end
-        else
-            # recursive search
-            for i = eachindex(ex.args)
-                ex.args[i], used = replace_ref_end_!(ex.args[i],withex)
-                used_withex |= used
-            end
-        end
-    end
-    ex, used_withex
-end
-
-"""
-    @view A[inds...]
-
-Creates a `SubArray` from an indexing expression. This can only be applied directly to a
-reference expression (e.g. `@view A[1,2:end]`), and should *not* be used as the target of
-an assignment (e.g. `@view(A[1,2:end]) = ...`).  See also [`@views`](@ref)
-to switch an entire block of code to use views for slicing.
-
-```jldoctest
-julia> A = [1 2; 3 4]
-2×2 Array{Int64,2}:
- 1  2
- 3  4
-
-julia> b = @view A[:, 1]
-2-element view(::Array{Int64,2}, :, 1) with eltype Int64:
- 1
- 3
-
-julia> fill!(b, 0)
-2-element view(::Array{Int64,2}, :, 1) with eltype Int64:
- 0
- 0
-
-julia> A
-2×2 Array{Int64,2}:
- 0  2
- 0  4
-```
-"""
-macro view(ex)
-    if Meta.isexpr(ex, :ref)
-        ex = replace_ref_end!(ex)
-        if Meta.isexpr(ex, :ref)
-            ex = Expr(:call, view, ex.args...)
-        else # ex replaced by let ...; foo[...]; end
-            @assert Meta.isexpr(ex, :let) && Meta.isexpr(ex.args[2], :ref)
-            ex.args[2] = Expr(:call, view, ex.args[2].args...)
-        end
-        Expr(:&&, true, esc(ex))
-    else
-        throw(ArgumentError("Invalid use of @view macro: argument must be a reference expression A[...]."))
-    end
-end
-
-############################################################################
-# @views macro code:
-
-# maybeview is like getindex, but returns a view for slicing operations
-# (while remaining equivalent to getindex for scalar indices and non-array types)
-@propagate_inbounds maybeview(A, args...) = getindex(A, args...)
-@propagate_inbounds maybeview(A::AbstractArray, args...) = view(A, args...)
-@propagate_inbounds maybeview(A::AbstractArray, args::Number...) = getindex(A, args...)
-@propagate_inbounds maybeview(A) = getindex(A)
-@propagate_inbounds maybeview(A::AbstractArray) = getindex(A)
-
-# _views implements the transformation for the @views macro.
-# @views calls esc(_views(...)) to work around #20241,
-# so any function calls we insert (to maybeview, or to
-# lastindex in replace_ref_end!) must be interpolated
-# as values rather than as symbols to ensure that they are called
-# from Base rather than from the caller's scope.
-_views(x) = x
-function _views(ex::Expr)
-    if ex.head in (:(=), :(.=))
-        # don't use view for ref on the lhs of an assignment,
-        # but still use views for the args of the ref:
-        lhs = ex.args[1]
-        Expr(ex.head, Meta.isexpr(lhs, :ref) ?
-                      Expr(:ref, _views.(lhs.args)...) : _views(lhs),
-             _views(ex.args[2]))
-    elseif ex.head == :ref
-        Expr(:call, maybeview, _views.(ex.args)...)
-    else
-        h = string(ex.head)
-        # don't use view on the lhs of an op-assignment a[i...] += ...
-        if last(h) == '=' && Meta.isexpr(ex.args[1], :ref)
-            lhs = ex.args[1]
-
-            # temp vars to avoid recomputing a and i,
-            # which will be assigned in a let block:
-            a = gensym(:a)
-            i = [gensym(:i) for k = 1:length(lhs.args)-1]
-
-            # for splatted indices like a[i, j...], we need to
-            # splat the corresponding temp var.
-            I = similar(i, Any)
-            for k = 1:length(i)
-                if Meta.isexpr(lhs.args[k+1], :...)
-                    I[k] = Expr(:..., i[k])
-                    lhs.args[k+1] = lhs.args[k+1].args[1] # unsplat
-                else
-                    I[k] = i[k]
-                end
-            end
-
-            Expr(:let,
-                 Expr(:block,
-                      :($a = $(_views(lhs.args[1]))),
-                      [:($(i[k]) = $(_views(lhs.args[k+1]))) for k=1:length(i)]...),
-                 Expr(first(h) == '.' ? :(.=) : :(=), :($a[$(I...)]),
-                      Expr(:call, Symbol(h[1:end-1]),
-                           :($maybeview($a, $(I...))),
-                           _views.(ex.args[2:end])...)))
-        else
-            Expr(ex.head, _views.(ex.args)...)
-        end
-    end
-end
-
-"""
-    @views expression
-
-Convert every array-slicing operation in the given expression
-(which may be a `begin`/`end` block, loop, function, etc.)
-to return a view. Scalar indices, non-array types, and
-explicit `getindex` calls (as opposed to `array[...]`) are
-unaffected.
-
-!!! note
-    The `@views` macro only affects `array[...]` expressions
-    that appear explicitly in the given `expression`, not array slicing that
-    occurs in functions called by that code.
-
-# Examples
-```jldoctest
-julia> A = zeros(3, 3);
-
-julia> @views for row in 1:3
-           b = A[row, :]
-           b[:] = row
-       end
-
-julia> A
-3×3 Array{Float64,2}:
- 1.0  1.0  1.0
- 2.0  2.0  2.0
- 3.0  3.0  3.0
-```
-"""
-macro views(x)
-    esc(_views(replace_ref_end!(x)))
-end
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 8750bfb0dd366..03103f5ad15d4 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -147,6 +147,7 @@ include("indices.jl")
 include("array.jl")
 include("abstractarray.jl")
 include("subarray.jl")
+include("views.jl")
 include("reinterpretarray.jl")
 
 
diff --git a/base/views.jl b/base/views.jl
new file mode 100644
index 0000000000000..e49d149e940ec
--- /dev/null
+++ b/base/views.jl
@@ -0,0 +1,212 @@
+"""
+    replace_ref_end!(ex)
+
+Recursively replace occurrences of the symbol :end in a "ref" expression (i.e. A[...]) `ex`
+with the appropriate function calls (`lastindex` or `size`). Replacement uses
+the closest enclosing ref, so
+
+    A[B[end]]
+
+should transform to
+
+    A[B[lastindex(B)]]
+
+"""
+replace_ref_end!(ex) = replace_ref_end_!(ex, nothing)[1]
+# replace_ref_end_!(ex,withex) returns (new ex, whether withex was used)
+function replace_ref_end_!(ex, withex)
+    used_withex = false
+    if isa(ex,Symbol) && ex == :end
+        withex === nothing && error("Invalid use of end")
+        return withex, true
+    elseif isa(ex,Expr)
+        if ex.head == :ref
+            ex.args[1], used_withex = replace_ref_end_!(ex.args[1],withex)
+            S = isa(ex.args[1],Symbol) ? ex.args[1]::Symbol : gensym(:S) # temp var to cache ex.args[1] if needed
+            used_S = false # whether we actually need S
+            # new :ref, so redefine withex
+            nargs = length(ex.args)-1
+            if nargs == 0
+                return ex, used_withex
+            elseif nargs == 1
+                # replace with lastindex(S)
+                ex.args[2], used_S = replace_ref_end_!(ex.args[2],:($lastindex($S)))
+            else
+                n = 1
+                J = lastindex(ex.args)
+                for j = 2:J
+                    exj, used = replace_ref_end_!(ex.args[j],:($lastindex($S,$n)))
+                    used_S |= used
+                    ex.args[j] = exj
+                    if isa(exj,Expr) && exj.head == :...
+                        # splatted object
+                        exjs = exj.args[1]
+                        n = :($n + length($exjs))
+                    elseif isa(n, Expr)
+                        # previous expression splatted
+                        n = :($n + 1)
+                    else
+                        # an integer
+                        n += 1
+                    end
+                end
+            end
+            if used_S && S !== ex.args[1]
+                S0 = ex.args[1]
+                ex.args[1] = S
+                ex = Expr(:let, :($S = $S0), ex)
+            end
+        else
+            # recursive search
+            for i = eachindex(ex.args)
+                ex.args[i], used = replace_ref_end_!(ex.args[i],withex)
+                used_withex |= used
+            end
+        end
+    end
+    ex, used_withex
+end
+
+"""
+    @view A[inds...]
+
+Creates a `SubArray` from an indexing expression. This can only be applied directly to a
+reference expression (e.g. `@view A[1,2:end]`), and should *not* be used as the target of
+an assignment (e.g. `@view(A[1,2:end]) = ...`).  See also [`@views`](@ref)
+to switch an entire block of code to use views for slicing.
+
+```jldoctest
+julia> A = [1 2; 3 4]
+2×2 Array{Int64,2}:
+ 1  2
+ 3  4
+
+julia> b = @view A[:, 1]
+2-element view(::Array{Int64,2}, :, 1) with eltype Int64:
+ 1
+ 3
+
+julia> fill!(b, 0)
+2-element view(::Array{Int64,2}, :, 1) with eltype Int64:
+ 0
+ 0
+
+julia> A
+2×2 Array{Int64,2}:
+ 0  2
+ 0  4
+```
+"""
+macro view(ex)
+    if Meta.isexpr(ex, :ref)
+        ex = replace_ref_end!(ex)
+        if Meta.isexpr(ex, :ref)
+            ex = Expr(:call, view, ex.args...)
+        else # ex replaced by let ...; foo[...]; end
+            @assert Meta.isexpr(ex, :let) && Meta.isexpr(ex.args[2], :ref)
+            ex.args[2] = Expr(:call, view, ex.args[2].args...)
+        end
+        Expr(:&&, true, esc(ex))
+    else
+        throw(ArgumentError("Invalid use of @view macro: argument must be a reference expression A[...]."))
+    end
+end
+
+############################################################################
+# @views macro code:
+
+# maybeview is like getindex, but returns a view for slicing operations
+# (while remaining equivalent to getindex for scalar indices and non-array types)
+@propagate_inbounds maybeview(A, args...) = getindex(A, args...)
+@propagate_inbounds maybeview(A::AbstractArray, args...) = view(A, args...)
+@propagate_inbounds maybeview(A::AbstractArray, args::Number...) = getindex(A, args...)
+@propagate_inbounds maybeview(A) = getindex(A)
+@propagate_inbounds maybeview(A::AbstractArray) = getindex(A)
+
+# _views implements the transformation for the @views macro.
+# @views calls esc(_views(...)) to work around #20241,
+# so any function calls we insert (to maybeview, or to
+# lastindex in replace_ref_end!) must be interpolated
+# as values rather than as symbols to ensure that they are called
+# from Base rather than from the caller's scope.
+_views(x) = x
+function _views(ex::Expr)
+    if ex.head in (:(=), :(.=))
+        # don't use view for ref on the lhs of an assignment,
+        # but still use views for the args of the ref:
+        lhs = ex.args[1]
+        Expr(ex.head, Meta.isexpr(lhs, :ref) ?
+                      Expr(:ref, _views.(lhs.args)...) : _views(lhs),
+             _views(ex.args[2]))
+    elseif ex.head == :ref
+        Expr(:call, maybeview, _views.(ex.args)...)
+    else
+        h = string(ex.head)
+        # don't use view on the lhs of an op-assignment a[i...] += ...
+        if last(h) == '=' && Meta.isexpr(ex.args[1], :ref)
+            lhs = ex.args[1]
+
+            # temp vars to avoid recomputing a and i,
+            # which will be assigned in a let block:
+            a = gensym(:a)
+            i = [gensym(:i) for k = 1:length(lhs.args)-1]
+
+            # for splatted indices like a[i, j...], we need to
+            # splat the corresponding temp var.
+            I = similar(i, Any)
+            for k = 1:length(i)
+                if Meta.isexpr(lhs.args[k+1], :...)
+                    I[k] = Expr(:..., i[k])
+                    lhs.args[k+1] = lhs.args[k+1].args[1] # unsplat
+                else
+                    I[k] = i[k]
+                end
+            end
+
+            Expr(:let,
+                 Expr(:block,
+                      :($a = $(_views(lhs.args[1]))),
+                      [:($(i[k]) = $(_views(lhs.args[k+1]))) for k=1:length(i)]...),
+                 Expr(first(h) == '.' ? :(.=) : :(=), :($a[$(I...)]),
+                      Expr(:call, Symbol(h[1:end-1]),
+                           :($maybeview($a, $(I...))),
+                           _views.(ex.args[2:end])...)))
+        else
+            Expr(ex.head, _views.(ex.args)...)
+        end
+    end
+end
+
+"""
+    @views expression
+
+Convert every array-slicing operation in the given expression
+(which may be a `begin`/`end` block, loop, function, etc.)
+to return a view. Scalar indices, non-array types, and
+explicit `getindex` calls (as opposed to `array[...]`) are
+unaffected.
+
+!!! note
+    The `@views` macro only affects `array[...]` expressions
+    that appear explicitly in the given `expression`, not array slicing that
+    occurs in functions called by that code.
+
+# Examples
+```jldoctest
+julia> A = zeros(3, 3);
+
+julia> @views for row in 1:3
+           b = A[row, :]
+           b[:] = row
+       end
+
+julia> A
+3×3 Array{Float64,2}:
+ 1.0  1.0  1.0
+ 2.0  2.0  2.0
+ 3.0  3.0  3.0
+```
+"""
+macro views(x)
+    esc(_views(replace_ref_end!(x)))
+end
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
new file mode 100644
index 0000000000000..e74763919cee5
--- /dev/null
+++ b/doc/src/devdocs/ssair.md
@@ -0,0 +1,100 @@
+# Julia SSA-form IR
+
+## Background
+
+Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
+intermediate representation. Historically, the compiler used to directly generate LLVM IR, from a lowered form of the Julia
+AST. This form had most syntactic abstractions removed, but still looked a lot like an abstract syntax tree.
+Over time, in order to facilitate optimizations, SSA values were introduced to this IR and the IR was
+linearized (i.e. a form where function arguments may only be SSA values or constants). However, non-ssa values
+(slots) remained in the IR due to the lack of Phi nodes in the IR (necessary for back-edges and re-merging of
+conditional control flow), negating much of the usefulfulness of the SSA form representation to perform
+middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
+form representation, but the lack of such a representation ultimately proved prohibitive.
+
+## New IR nodes
+
+With the new IR representation, the compiler learned to handle two new IR nodes, Phi nodes and Pi
+nodes. Phi nodes are part of generic SSA abstraction (see the link above if you're not familar with
+the concept). In the Julia IR, these nodes are represented as:
+```
+struct PhiNode
+    edges::Vector{Int}
+    values::Vector{Any}
+end
+```
+where we ensure that both vectors always have the same length. In the canonical representation (the one
+handles by codegen and the interpreter), the edge values indicate crom-from statement numbers (i.e.
+if edge has an entry of `15`, there must be a `goto`, `gotoifnot` or implicit fall through from
+statement `15` that targets this phi node). Values are either SSA values or constants. It is also
+possible for a value to be unassigned if the variable was not defined on this path. However, undefinedness
+checks get explicitly inserted and represented as booleans after middle end optimizations, so code generators
+may assume that any use of a phi node will have an assigned value in the corresponding slot. It is also legal
+for the mapping to be incomplete, i.e. for a phi node to have missing incoming edges. In that case, it must
+be dynamically guaranteed that the corresponding value will not be used.
+
+PiNodes encode statically proven information that may be implicitly assumed in basic blocks dominated by a given
+phi node. They are conceptually equivalent to the technique introduced in the paper
+"ABCD: Eliminating Array Bounds Checks on Demand" or the predicate info nodes in LLVM. To see how they work, consider,
+e.g.
+
+```
+%x::Union{Int, Float64} # %x is some Union{Int, Float64} typed ssa value
+if isa(x, Int)
+    # use x
+else
+    # use x
+end
+```
+
+we can perform predicate insertion and turn this into:
+
+```
+%x::Union{Int, Float64} # %x is some Union{Int, Float64} typed ssa value
+if isa(x, Int)
+    %x_int = PiNode(x, Int)
+    # use %x_int
+else
+    %x_float = PiNode(x, Float64)
+    # use %x_float
+end
+```
+
+Pi nodes are generally ignored in the interpreter, since they don't have any effect on the values,
+but they may sometimes lead to code generation in the compiler (e.g. to change from an implicitly
+union split representation to a plain unboxed representation). The main usefulness of PiNodes stems
+from the fact that path conditions of the values can be accumulated simply by def-use chain walking
+that is generally done for most optimizations that care about these conditions anyway.
+
+# Main SSA data structure
+
+The main `SSAIR` data structure is worthy of discussion. It draws inspiration from LLVM and Webkit's B3 IR.
+The core of the data structure is a flat vector of statements. Each statement is implicitly assigned
+an SSA values based on its position in the vector (i.e. the result of the statement at idx 1 can be
+accessed using `SSAValue(1)` etc). For each SSA value, we additionally maintain its type. Since, SSA values
+are definitionally assigned only once, this type is also the result type of the expression at the corresponding
+index. However, while this representation is rather efficient (since the assignments don't need to be explicitly)
+encoded, if of course carries the drawback that order is semantically significant, so reorderings and insertions
+change statement numbers. Additionally, we do not keep use lists (i.e. it is impossible to walk from a def to
+all its uses without explicitly computing this map - def lists however are trivial since you can lookup the
+corresponding statement from the index), so the LLVM-style RAUW (replace-all-uses-with) operation is unavailable.
+
+Instead, we do the following:
+    - We keep a separate buffer of nodes to insert (including the position to insert them at, the type of the
+      corresponding value and the node itself). These nodes are numbered by their occurrence in the insertion
+      buffer, allowing their values to be immediately used elesewhere in the IR (i.e. if there is 12 statements in
+      the original statement list, the first new statement will be accessible as `SSAValue(13)`)
+    - RAUW style operations are performed by setting the corresponding statement index to the replacement
+      value.
+    - Statements are erased by setting the corresponding statement to `nothing` (this is essentially just a special-case      convention of the above - if there are any uses of the statement being erased they will be set to `nothing`)
+
+There is a `compact!` function that compacts the above data structure by performing the insertion of nodes in the appropriate place, trivial copy propagation and renaming of uses to any changed SSA values. However, the clever part
+of this scheme is that this compaction can be done lazily as part of the subsequent pass. Most optimization passes
+need to walk over the entire list of statements, performing analysis or modifications along the way. We provide an
+`IncrementalCompact` iterator that can be used to iterate over the statement list. It will perform any necessary compaction,
+and return the new index of the node, as well as the node itself. It is legal at this point to walk def-use chains,
+as well as make any modifications or deletions to the IR (insertions are disallowed however).
+
+The idea behind this arrangement is that, since the optimization passes need to touch the corresponding memory anyway,
+and incur the corresponding memory access penalty, performing the extra housekeeping should have comparitively little
+overhead (and save the overhead of maintaining these data structures during IR modification).
diff --git a/src/ast.c b/src/ast.c
index a44cc545b55cc..d6c8968350187 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -63,6 +63,7 @@ jl_sym_t *macrocall_sym; jl_sym_t *colon_sym;
 jl_sym_t *hygienicscope_sym;
 jl_sym_t *escape_sym;
 jl_sym_t *gc_preserve_begin_sym; jl_sym_t *gc_preserve_end_sym;
+jl_sym_t *throw_undef_if_not_sym;
 
 static uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
@@ -383,6 +384,7 @@ void jl_init_frontend(void)
     gc_preserve_end_sym = jl_symbol("gc_preserve_end");
     generated_sym = jl_symbol("generated");
     generated_only_sym = jl_symbol("generated_only");
+    throw_undef_if_not_sym = jl_symbol("throw_undef_if_not");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
diff --git a/src/builtins.c b/src/builtins.c
index 3f966bdd3fd66..ccbcff38b953a 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -1275,6 +1275,8 @@ void jl_init_primitives(void)
     add_builtin("LineNumberNode", (jl_value_t*)jl_linenumbernode_type);
     add_builtin("LabelNode", (jl_value_t*)jl_labelnode_type);
     add_builtin("GotoNode", (jl_value_t*)jl_gotonode_type);
+    add_builtin("PiNode", (jl_value_t*)jl_pinode_type);
+    add_builtin("PhiNode", (jl_value_t*)jl_phinode_type);
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 51eb820802343..acc630914db82 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -2060,7 +2060,7 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t
     return tindex;
 }
 
-// get the runtime tindex value
+// get the runtime tindex value, assuming val is already converted to type typ if it has a TIndex
 static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
     if (val.typ == jl_bottom_type)
@@ -2069,8 +2069,50 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
         return ConstantInt::get(T_int8, get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
     if (val.isboxed)
         return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
-    assert(val.TIndex);
-    return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
+    if (val.TIndex)
+        return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
+    return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
+}
+
+static void union_alloca_type(jl_uniontype_t *ut,
+        bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
+{
+    nbytes = 0;
+    align = 0;
+    min_align = MAX_ALIGN;
+    // compute the size of the union alloca that could hold this type
+    unsigned counter = 0;
+    allunbox = for_each_uniontype_small(
+            [&](unsigned idx, jl_datatype_t *jt) {
+                if (!jl_is_datatype_singleton(jt)) {
+                    size_t nb1 = jl_datatype_size(jt);
+                    size_t align1 = jl_datatype_align(jt);
+                    if (nb1 > nbytes)
+                        nbytes = nb1;
+                    if (align1 > align)
+                        align = align1;
+                    if (align1 < min_align)
+                        min_align = align1;
+                }
+            },
+            (jl_value_t*)ut,
+            counter);
+}
+
+static Value *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align)
+{
+    size_t nbytes, align;
+    union_alloca_type(ut, allunbox, nbytes, align, min_align);
+    if (nbytes > 0) {
+        // at least some of the values can live on the stack
+        // try to pick an Integer type size such that SROA will emit reasonable code
+        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * min_align), (nbytes + min_align - 1) / min_align);
+        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        if (align > 1)
+            lv->setAlignment(align);
+        return lv;
+    }
+    return NULL;
 }
 
 /*
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 4a14fbec6a4a7..afeadcae3c8f0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -64,6 +64,7 @@
 #include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/SourceMgr.h> // for llvmcall
 #include <llvm/Transforms/Utils/Cloning.h> // for llvmcall inlining
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
 #include <llvm/IR/Verifier.h> // for llvmcall validation
 #if JL_LLVM_VERSION >= 40000
 #  include <llvm/Bitcode/BitcodeWriter.h>
@@ -387,7 +388,7 @@ struct jl_cgval_t {
     // For unions, we may need to keep a reference to the boxed part individually.
     // If this is non-NULL, then, at runtime, we satisfy the invariant that (for the corresponding
     // runtime values) if `(TIndex | 0x80) != 0`, then `Vboxed == V` (by value).
-    // For conenience, we also set this value of isboxed values, in which case
+    // For convenience, we also set this value of isboxed values, in which case
     // it is equal (at compile time) to V.
     Value *Vboxed;
     Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8
@@ -527,6 +528,7 @@ class jl_codectx_t {
     // local var info. globals are not in here.
     std::vector<jl_varinfo_t> slots;
     std::vector<jl_cgval_t> SAvalues;
+    std::vector<std::tuple<jl_cgval_t, PHINode *, jl_value_t *>> PhiNodes;
     std::vector<bool> ssavalue_assigned;
     std::map<int, jl_arrayvar_t> *arrayvars = NULL;
     jl_module_t *module = NULL;
@@ -1970,7 +1972,6 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
     }
     else if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        size_t i;
         if (e->head == method_sym) {
             simple_use_analysis(ctx, jl_exprarg(e, 0));
             if (jl_expr_nargs(e) > 1) {
@@ -1983,12 +1984,24 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
             simple_use_analysis(ctx, jl_exprarg(e, 1));
         }
         else {
-            size_t elen = jl_array_dim0(e->args);
+            size_t i, elen = jl_array_dim0(e->args);
             for (i = 0; i < elen; i++) {
                 simple_use_analysis(ctx, jl_exprarg(e, i));
             }
         }
     }
+    else if (jl_is_pinode(expr)) {
+        simple_use_analysis(ctx, jl_fieldref_noalloc(expr, 0));
+    }
+    else if (jl_is_phinode(expr)) {
+        jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 1);
+        size_t i, elen = jl_array_len(values);
+        for (i = 0; i < elen; i++) {
+            jl_value_t *v = jl_array_ptr_ref(values, i);
+            if (v)
+                simple_use_analysis(ctx, v);
+        }
+    }
 }
 
 // --- gc root utils ---
@@ -2267,8 +2280,10 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.V;
         assert(varg1 && varg2 && (arg1.isboxed || arg1.TIndex) && (arg2.isboxed || arg2.TIndex) &&
                 "Only boxed types are valid for pointer comparison.");
-        return ctx.builder.CreateICmpEQ(decay_derived(varg1),
-                                        decay_derived(varg2));
+        varg1 = decay_derived(varg1);
+        varg2 = decay_derived(varg2);
+        return ctx.builder.CreateICmpEQ(emit_bitcast(ctx, varg1, T_pint8),
+                                        emit_bitcast(ctx, varg2, T_pint8));
     }
 
     Value *varg1 = mark_callee_rooted(boxed(ctx, arg1));
@@ -3371,48 +3386,6 @@ static jl_cgval_t emit_local(jl_codectx_t &ctx, jl_value_t *slotload)
     return v;
 }
 
-
-static void union_alloca_type(jl_uniontype_t *ut,
-        bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
-{
-    nbytes = 0;
-    align = 0;
-    min_align = MAX_ALIGN;
-    // compute the size of the union alloca that could hold this type
-    unsigned counter = 0;
-    allunbox = for_each_uniontype_small(
-            [&](unsigned idx, jl_datatype_t *jt) {
-                if (!jl_is_datatype_singleton(jt)) {
-                    size_t nb1 = jl_datatype_size(jt);
-                    size_t align1 = jl_datatype_align(jt);
-                    if (nb1 > nbytes)
-                        nbytes = nb1;
-                    if (align1 > align)
-                        align = align1;
-                    if (align1 < min_align)
-                        min_align = align1;
-                }
-            },
-            (jl_value_t*)ut,
-            counter);
-}
-
-static Value *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align)
-{
-    size_t nbytes, align;
-    union_alloca_type(ut, allunbox, nbytes, align, min_align);
-    if (nbytes > 0) {
-        // at least some of the values can live on the stack
-        // try to pick an Integer type size such that SROA will emit reasonable code
-        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * min_align), (nbytes + min_align - 1) / min_align);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
-        if (align > 1)
-            lv->setAlignment(align);
-        return lv;
-    }
-    return NULL;
-}
-
 static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Value *isboxed, jl_cgval_t rval_info)
 {
     if (vi.usedUndef)
@@ -3478,9 +3451,94 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
     }
 }
 
+static void emit_phinode_assign(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r)
+{
+    ssize_t idx = ((jl_ssavalue_t*)l)->id;
+    assert(idx >= 0);
+    assert(!ctx.ssavalue_assigned.at(idx));
+    jl_value_t *ssavalue_types = (jl_value_t*)ctx.source->ssavaluetypes;
+    assert(jl_is_array(ssavalue_types));
+    jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
+    jl_value_t *phiType = jl_array_ptr_ref(ssavalue_types, idx);
+    BasicBlock *BB = ctx.builder.GetInsertBlock();
+    auto InsertPt = BB->getFirstInsertionPt();
+    if (phiType == jl_bottom_type) {
+        return;
+    }
+    if (jl_is_uniontype(phiType)) {
+        bool allunbox;
+        size_t min_align;
+        Value *dest = try_emit_union_alloca(ctx, ((jl_uniontype_t*)phiType), allunbox, min_align);
+        Value *ptr = NULL;
+        if (dest) {
+            PHINode *Tindex_phi = PHINode::Create(T_int8, jl_array_len(edges), "tindex_phi");
+            BB->getInstList().insert(InsertPt, Tindex_phi);
+            PHINode *ptr_phi = PHINode::Create(T_prjlvalue, jl_array_len(edges), "ptr_phi");
+            BB->getInstList().insert(InsertPt, ptr_phi);
+            Value *isboxed = ctx.builder.CreateICmpNE(
+                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(T_int8, 0x80)),
+                    ConstantInt::get(T_int8, 0));
+            ptr = ctx.builder.CreateSelect(isboxed,
+                maybe_bitcast(ctx, decay_derived(ptr_phi), T_pint8),
+                maybe_bitcast(ctx, decay_derived(dest), T_pint8));
+            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, tbaa_stack);
+            val.Vboxed = ptr_phi;
+            ctx.PhiNodes.push_back(std::make_tuple(val, ptr_phi, r));
+            ctx.SAvalues.at(idx) = val;
+            ctx.ssavalue_assigned.at(idx) = true;
+            return;
+        }
+        else if (allunbox) {
+            PHINode *Tindex_phi = PHINode::Create(T_int8, jl_array_len(edges), "tindex_phi");
+            BB->getInstList().insert(InsertPt, Tindex_phi);
+            jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, tbaa_stack);
+            ctx.PhiNodes.push_back(std::make_tuple(val, (PHINode*)NULL, r));
+            ctx.SAvalues.at(idx) = val;
+            ctx.ssavalue_assigned.at(idx) = true;
+            return;
+        }
+    }
+    bool isboxed;
+    Type *vtype = julia_type_to_llvm(phiType, &isboxed);
+    if (isboxed)
+        vtype = T_prjlvalue;
+    // The frontend should really not emit this, but we allow it
+    // for convenience.
+    if (type_is_ghost(vtype)) {
+        assert(jl_is_datatype(phiType) && ((jl_datatype_t*)phiType)->instance);
+        // Skip adding it to the PhiNodes list, since we didn't create one.
+        ctx.SAvalues.at(idx) = mark_julia_const(((jl_datatype_t*)phiType)->instance);
+        ctx.ssavalue_assigned.at(idx) = true;
+        return;
+    }
+    jl_cgval_t slot;
+    PHINode *value_phi = NULL;
+    if (vtype->isAggregateType()) {
+        value_phi = PHINode::Create(vtype->getPointerTo(AddressSpace::Derived),
+                jl_array_len(edges), "value_phi");
+        BB->getInstList().insert(InsertPt, value_phi);
+        Value *alloc = emit_static_alloca(ctx, vtype);
+        ctx.builder.CreateMemCpy(alloc, value_phi, jl_datatype_size(phiType),
+            jl_datatype_align(phiType), false);
+        slot = mark_julia_slot(alloc, phiType, NULL, tbaa_stack);
+    }
+    else {
+        value_phi = PHINode::Create(vtype, jl_array_len(edges), "value_phi");
+        BB->getInstList().insert(InsertPt, value_phi);
+        slot = mark_julia_type(ctx, value_phi, isboxed, phiType);
+    }
+    ctx.PhiNodes.push_back(std::make_tuple(slot, value_phi, r));
+    ctx.SAvalues.at(idx) = slot;
+    ctx.ssavalue_assigned.at(idx) = true;
+    return;
+}
+
 static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r)
 {
     if (jl_is_ssavalue(l)) {
+        if (jl_is_phinode(r)) {
+            return emit_phinode_assign(ctx, l, r);
+        }
         ssize_t idx = ((jl_ssavalue_t*)l)->id;
         assert(idx >= 0);
         assert(!ctx.ssavalue_assigned.at(idx));
@@ -3600,7 +3658,7 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r)
             tindex = compute_tindex_unboxed(ctx, rval_info, vi.value.typ);
             if (vi.boxroot)
                 tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80));
-            if (!vi.boxroot)
+            else
                 rval_info.TIndex = tindex;
         }
         ctx.builder.CreateStore(tindex, vi.pTIndex, vi.isVolatile);
@@ -3747,6 +3805,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr)
     if (jl_is_gotonode(expr)) {
         jl_error("GotoNode in value position");
     }
+    if (jl_is_pinode(expr)) {
+        return convert_julia_type(ctx, emit_expr(ctx, jl_fieldref_noalloc(expr, 0)), jl_fieldref_noalloc(expr, 1));
+    }
     if (!jl_is_expr(expr)) {
         int needroot = true;
         if (jl_is_quotenode(expr)) {
@@ -3784,6 +3845,11 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr)
     if (head == isdefined_sym) {
         return emit_isdefined(ctx, args[0]);
     }
+    else if (head == throw_undef_if_not_sym) {
+        Value *cond = emit_unbox(ctx, T_int8, emit_expr(ctx, args[1]), (jl_value_t*)jl_bool_type);
+        undef_var_error_ifnot(ctx, ctx.builder.CreateTrunc(cond, T_int1), (jl_sym_t*)args[0]);
+        return ghostValue(jl_void_type);
+    }
     else if (head == invoke_sym) {
         return emit_invoke(ctx, ex);
     }
@@ -5622,25 +5688,16 @@ static std::unique_ptr<Module> emit_function(
             }
             return bb;
         }
-        // If this is a label node in an empty bb
-        if (lname == cursor + 1 && cur_bb->begin() == cur_bb->end()) {
-            assert(unconditional);
-            // Use this bb as the one for the new label.
-            bb = cur_bb;
+        // use the label name as the BB name.
+        bb = BasicBlock::Create(jl_LLVMContext,
+                                "L" + std::to_string(lname), f);
+        if (unconditional) {
+           if (!cur_bb->getTerminator())
+               ctx.builder.CreateBr(bb);
+           ctx.builder.SetInsertPoint(bb);
         }
         else {
-            // Otherwise, create a new BB
-            // use the label name as the BB name.
-            bb = BasicBlock::Create(jl_LLVMContext,
-                                    "L" + std::to_string(lname), f);
-            if (unconditional) {
-                if (!cur_bb->getTerminator())
-                    ctx.builder.CreateBr(bb);
-                ctx.builder.SetInsertPoint(bb);
-            }
-            else {
-                add_to_list(lname, bb);
-            }
+           add_to_list(lname, bb);
         }
         if (unconditional)
             find_next_stmt(lname);
@@ -5658,6 +5715,9 @@ static std::unique_ptr<Module> emit_function(
                 (malloc_log_mode == JL_LOG_USER && in_user_code));
     };
 
+    std::map<size_t, BasicBlock*> come_from_bb;
+    come_from_bb[0] = ctx.builder.GetInsertBlock();
+
     // Handle the implicit first line number node.
     if (ctx.debug_enabled)
         ctx.builder.SetCurrentDebugLocation(topdebugloc);
@@ -5778,6 +5838,7 @@ static std::unique_ptr<Module> emit_function(
         }
         if (jl_is_gotonode(stmt)) {
             int lname = jl_gotonode_label(stmt);
+            come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
             handle_label(lname, true);
             continue;
         }
@@ -5786,17 +5847,23 @@ static std::unique_ptr<Module> emit_function(
             jl_value_t *cond = args[0];
             int lname = jl_unbox_long(args[1]);
             Value *isfalse = emit_condition(ctx, cond, "if");
+            come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
             if (do_malloc_log(props.in_user_code) && props.line != -1)
                 mallocVisitLine(ctx, props.file, props.line);
-            BasicBlock *ifso = BasicBlock::Create(jl_LLVMContext, "if", f);
+            bool next_is_label = jl_is_labelnode(jl_array_ptr_ref(stmts, cursor+1));
             BasicBlock *ifnot = handle_label(lname, false);
+            BasicBlock *ifso = next_is_label ? handle_label(cursor+2, false) : BasicBlock::Create(jl_LLVMContext, "if", f);
             // Any branches treated as constant in type inference should be
             // eliminated before running
             ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
-            ctx.builder.SetInsertPoint(ifso);
+            if (!next_is_label)
+                ctx.builder.SetInsertPoint(ifso);
+            find_next_stmt(next_is_label ? -1 : cursor+1);
+            continue;
         }
         else if (expr && expr->head == enter_sym) {
             jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
+
             assert(jl_is_long(args[0]));
             int lname = jl_unbox_long(args[0]);
             CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
@@ -5828,11 +5895,216 @@ static std::unique_ptr<Module> emit_function(
                 mallocVisitLine(ctx, props.file, props.line);
             }
         }
+        if (cursor + 1 < jl_array_len(stmts) && jl_is_labelnode(jl_array_ptr_ref(stmts, cursor+1)))
+            come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
         find_next_stmt(cursor + 1);
     }
     ctx.builder.SetCurrentDebugLocation(noDbg);
     ctx.builder.ClearInsertionPoint();
 
+    // We don't visit empty labels, but they can still be implicit terminators,
+    // just add them to the list
+    for (auto &pair : labels)
+        come_from_bb[pair.first] = pair.second;
+
+    // Codegen Phi nodes
+    std::map<BasicBlock *, BasicBlock*> BB_rewrite_map;
+    for (auto &tup : ctx.PhiNodes) {
+        jl_cgval_t phi_result;
+        PHINode *VN;
+        jl_value_t *r;
+        std::tie(phi_result, VN, r) = tup;
+        jl_value_t *phiType = phi_result.typ;
+        jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
+        jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1);
+        Value *PhiAlloca = NULL;
+        if (phi_result.V && isa<SelectInst>(phi_result.V)) {
+            PhiAlloca = cast<SelectInst>(phi_result.V)->getOperand(2)->stripPointerCasts();
+        }
+        PHINode *TindexN = cast_or_null<PHINode>(phi_result.TIndex);
+        BasicBlock *PhiBB = VN ? VN->getParent() : TindexN->getParent();
+        for (size_t i = 0; i < jl_array_len(edges); ++i) {
+            size_t edge = jl_unbox_long(jl_array_ptr_ref(edges, i));
+            jl_value_t *value = jl_array_ptr_ref(values, i);
+            Value *V = NULL;
+            BasicBlock *IncomingBB = come_from_bb[edge];
+            BasicBlock *FromBB = IncomingBB;
+            if (BB_rewrite_map.count(FromBB)) {
+                FromBB = BB_rewrite_map[IncomingBB];
+            }
+#ifndef JL_NDEBUG
+            bool found_pred = false;
+            for (BasicBlock *pred : predecessors(PhiBB)) {
+                found_pred = pred == FromBB;
+                if (found_pred)
+                    break;
+            }
+            assert(found_pred);
+#endif
+            ctx.builder.SetInsertPoint(FromBB->getTerminator());
+            jl_cgval_t val;
+            if (!value || jl_is_ssavalue(value)) {
+                ssize_t idx = value ? ((jl_ssavalue_t*)value)->id : 0;
+                if (!value || !ctx.ssavalue_assigned.at(idx)) {
+                    Value *RTindex = TindexN ? UndefValue::get(T_int8) : NULL;
+                     if (VN) { // otherwise, it's all-unboxed
+                        Value *undef;
+                        if (isa<PointerType>(VN->getType())) {
+                            bool isboxed;
+                            Type *lphity = julia_type_to_llvm(phiType, &isboxed);
+                            if (!isboxed) {
+                                // the emit_phinode_assign emitted a memcpy in this case,
+                                // so this needs to ensure the pointer is valid, while the contents are undef
+                                undef = decay_derived(emit_static_alloca(ctx, lphity));
+                            }
+                            else {
+                                // but make sure gc pointers (including ptr_phi of union-split) are NULL
+                                undef = ConstantPointerNull::get(cast<PointerType>(VN->getType()));
+                                if (TindexN) // let the runtime / optimizer know this is unknown / boxed / null, so that it won't try to union_move / copy it later
+                                    RTindex = ConstantInt::get(T_int8, 0x80);
+                            }
+                        }
+                        else {
+                            undef = UndefValue::get(VN->getType());
+                        }
+                        VN->addIncoming(undef, FromBB);
+                    }
+                    if (TindexN)
+                        TindexN->addIncoming(RTindex, FromBB);
+                    continue;
+                }
+                val = ctx.SAvalues.at(idx);
+            }
+            else {
+                val = emit_expr(ctx, value);
+            }
+            if (val.constant)
+                val = mark_julia_const(val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
+            TerminatorInst *terminator = FromBB->getTerminator();
+            if (!isa<BranchInst>(terminator) || cast<BranchInst>(terminator)->isConditional()) {
+                bool found = false;
+                for (size_t i = 0; i < terminator->getNumSuccessors(); ++i) {
+                    if (terminator->getSuccessor(i) == PhiBB) {
+                        // Can't use `llvm::SplitCriticalEdge` here because
+                        // we may have invalid phi nodes in the destination.
+                        BasicBlock *NewBB = BasicBlock::Create(terminator->getContext(),
+                           FromBB->getName() + "." + PhiBB->getName() + "_crit_edge");
+                        terminator->setSuccessor(i, NewBB);
+                        Function::iterator FBBI = FromBB->getIterator();
+                        ctx.f->getBasicBlockList().insert(++FBBI, NewBB);
+                        ctx.builder.SetInsertPoint(NewBB);
+                        found = true;
+                        break;
+                    }
+                }
+                assert(found);
+            }
+            else {
+                terminator->eraseFromParent();
+                ctx.builder.SetInsertPoint(FromBB);
+            }
+            if (!jl_is_uniontype(phiType)) {
+                if (val.typ == (jl_value_t*)jl_bottom_type) {
+                    V = UndefValue::get(VN->getType());
+                }
+                else if (VN->getType() == T_prjlvalue) {
+                    V = boxed(ctx, val);
+                }
+                else if (VN->getType()->isPointerTy()) {
+                    V = maybe_bitcast(ctx,
+                            decay_derived(data_pointer(ctx, val)),
+                            VN->getType());
+                }
+                else {
+                    V = emit_unbox(ctx, VN->getType(), val, val.typ);
+                }
+                VN->addIncoming(V, ctx.builder.GetInsertBlock());
+                assert(!TindexN);
+            }
+            else if (!TindexN) {
+                VN->addIncoming(boxed(ctx, val), ctx.builder.GetInsertBlock());
+            }
+            else {
+                Value *RTindex = NULL;
+                if (val.typ == (jl_value_t*)jl_bottom_type) {
+                    V = UndefValue::get(VN->getType());
+                    RTindex = UndefValue::get(T_int8);
+                }
+                else if (jl_is_concrete_type(val.typ) || val.constant) {
+                    size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
+                    if (tindex == 0) {
+                        V = boxed(ctx, val);
+                        RTindex = ConstantInt::get(T_int8, 0x80);
+                    }
+                    else {
+                        V = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
+                        Type *lty = julia_type_to_llvm(val.typ);
+                        if (PhiAlloca && !type_is_ghost(lty)) // basically, if !ghost union
+                            emit_unbox(ctx, lty, val, val.typ, PhiAlloca);
+                        RTindex = ConstantInt::get(T_int8, tindex);
+                    }
+                }
+                else {
+                    jl_cgval_t new_union = convert_julia_type(ctx, val, phiType);
+                    RTindex = new_union.TIndex;
+                    if (!RTindex) {
+                        assert(new_union.isboxed && new_union.Vboxed && "convert_julia_type failed");
+                        RTindex = compute_tindex_unboxed(ctx, new_union, phiType);
+                        RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(T_int8, 0x80));
+                        new_union.TIndex = RTindex;
+                    }
+                    V = new_union.Vboxed ? new_union.Vboxed : ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
+                    if (PhiAlloca) { // basically, if !ghost union
+                        Value *skip = NULL;
+                        if (new_union.Vboxed != nullptr)
+                            skip = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
+                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(T_int8, 0x80)),
+                                    ConstantInt::get(T_int8, 0));
+                        emit_unionmove(ctx, PhiAlloca, new_union, skip, false, NULL);
+                    }
+                }
+                if (VN)
+                    VN->addIncoming(V, ctx.builder.GetInsertBlock());
+                if (TindexN)
+                    TindexN->addIncoming(RTindex, ctx.builder.GetInsertBlock());
+            }
+            ctx.builder.CreateBr(PhiBB);
+            // Check any phi nodes in the Phi block to see if by splitting the edges,
+            // we made things inconsistent
+            if (FromBB != ctx.builder.GetInsertBlock()) {
+                BB_rewrite_map[IncomingBB] = ctx.builder.GetInsertBlock();
+                for (BasicBlock::iterator I = PhiBB->begin(); isa<PHINode>(I); ++I) {
+                    PHINode *PN = cast<PHINode>(I);
+                    ssize_t BBIdx = PN->getBasicBlockIndex(FromBB);
+                    if (BBIdx == -1)
+                        continue;
+                    PN->setIncomingBlock(BBIdx, ctx.builder.GetInsertBlock());
+                }
+            }
+        }
+        // Julia PHINodes may be incomplete with respect to predecessors, LLVM's may not
+        for (auto *pred : predecessors(PhiBB)) {
+            PHINode *PhiN = VN ? VN : TindexN;
+            bool found = false;
+            for (size_t i = 0; i < PhiN->getNumIncomingValues(); ++i) {
+                found = pred == PhiN->getIncomingBlock(i);
+                if (found)
+                    break;
+            }
+            if (!found) {
+                if (VN) {
+                    Value *undef = VN->getType() == T_prjlvalue ?
+                        (llvm::Value*)ConstantPointerNull::get(cast<PointerType>(T_prjlvalue)) :
+                        (llvm::Value*)UndefValue::get(VN->getType());
+                    VN->addIncoming(undef, pred);
+                }
+                if (TindexN) {
+                    TindexN->addIncoming(UndefValue::get(TindexN->getType()), pred);
+                }
+            }
+        }
+    }
+
     // step 13. Perform any delayed instantiations
     if (ctx.debug_enabled) {
         dbuilder.finalize();
diff --git a/src/dump.c b/src/dump.c
index 71416cfb52de8..49479c28f48ab 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -2844,10 +2844,7 @@ void jl_init_serializer(void)
                      jl_box_int32(21), jl_box_int32(22), jl_box_int32(23),
                      jl_box_int32(24), jl_box_int32(25), jl_box_int32(26),
                      jl_box_int32(27), jl_box_int32(28), jl_box_int32(29),
-                     jl_box_int32(30), jl_box_int32(31), jl_box_int32(32),
-#ifndef _P64
-                     jl_box_int32(33), jl_box_int32(34), jl_box_int32(35),
-#endif
+                     jl_box_int32(30), jl_box_int32(31),
                      jl_box_int64(0), jl_box_int64(1), jl_box_int64(2),
                      jl_box_int64(3), jl_box_int64(4), jl_box_int64(5),
                      jl_box_int64(6), jl_box_int64(7), jl_box_int64(8),
@@ -2858,12 +2855,10 @@ void jl_init_serializer(void)
                      jl_box_int64(21), jl_box_int64(22), jl_box_int64(23),
                      jl_box_int64(24), jl_box_int64(25), jl_box_int64(26),
                      jl_box_int64(27), jl_box_int64(28), jl_box_int64(29),
-                     jl_box_int64(30), jl_box_int64(31), jl_box_int64(32),
-#ifdef _P64
-                     jl_box_int64(33), jl_box_int64(34), jl_box_int64(35),
-#endif
+                     jl_box_int64(30), jl_box_int64(31),
                      jl_labelnode_type, jl_linenumbernode_type, jl_gotonode_type,
-                     jl_quotenode_type, jl_type_type, jl_bottom_type, jl_ref_type,
+                     jl_quotenode_type, jl_pinode_type, jl_phinode_type,
+                     jl_type_type, jl_bottom_type, jl_ref_type,
                      jl_pointer_type, jl_vararg_type, jl_abstractarray_type, jl_void_type,
                      jl_densearray_type, jl_function_type, jl_unionall_type, jl_typename_type,
                      jl_builtin_type, jl_task_type, jl_uniontype_type, jl_typetype_type,
diff --git a/src/gf.c b/src/gf.c
index 912cf1b863aca..2ee9be971d12c 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -239,6 +239,9 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t **pli, size_t world, int forc
     JL_TIMING(INFERENCE);
     if (jl_typeinf_func == NULL)
         return NULL;
+    static int in_inference;
+    if (in_inference > 2)
+        return NULL;
 #ifdef ENABLE_INFERENCE
     jl_method_instance_t *li = *pli;
     if (li->inInference && !force)
@@ -260,8 +263,10 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t **pli, size_t world, int forc
     size_t last_age = ptls->world_age;
     ptls->world_age = jl_typeinf_world;
     li->inInference = 1;
+    in_inference++;
     jl_svec_t *linfo_src_rettype = (jl_svec_t*)jl_apply_with_saved_exception_state(fargs, 3, 0);
     ptls->world_age = last_age;
+    in_inference--;
     li->inInference = 0;
 
     jl_code_info_t *src = NULL;
diff --git a/src/interpreter-stacktrace.c b/src/interpreter-stacktrace.c
index ee37bfdc9acfc..6cbe5d2be805b 100644
--- a/src/interpreter-stacktrace.c
+++ b/src/interpreter-stacktrace.c
@@ -71,7 +71,7 @@ uintptr_t __stop_jl_interpreter_frame = (uintptr_t)&__stop_jl_interpreter_frame_
 //               sizeof(struct interpreter_state). Additionally, make sure that
 //               MAX_INTERP_STATE_SIZE+STACK_PADDING+8 is a multiple of 16 to
 //               ensure the proper stack alignment.
-#define MAX_INTERP_STATE_SIZE 56
+#define MAX_INTERP_STATE_SIZE 72
 #define STACK_PADDING 0
 
 static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, "Stack layout invariants violated.");
diff --git a/src/interpreter.c b/src/interpreter.c
index c3da245e2d79d..31e6865732719 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -20,6 +20,7 @@ typedef struct {
     jl_module_t *module; // context for globals
     jl_value_t **locals; // slots for holding local slots and ssavalues
     jl_svec_t *sparam_vals; // method static parameters, if eval-ing a method body
+    size_t last_branch; // Points at the last branch statement (for evaluating phi nodes)
     size_t ip; // Points to the currently-evaluating statement
     int preevaluation; // use special rules for pre-evaluating expressions
     int continue_at; // statement index to jump to after leaving exception handler (0 if none)
@@ -429,6 +430,14 @@ SECT_INTERP static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         }
         return defined ? jl_true : jl_false;
     }
+    else if (head == throw_undef_if_not_sym) {
+        jl_value_t *cond = eval_value(args[1], s);
+        assert(jl_is_bool(cond));
+        if (cond == jl_false) {
+            jl_undefined_var_error((jl_sym_t*)args[0]);
+        }
+        return jl_nothing;
+    }
     else if (head == new_sym) {
         jl_value_t *thetype = eval_value(args[0], s);
         jl_value_t *v=NULL, *fldv=NULL;
@@ -496,17 +505,45 @@ SECT_INTERP static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s
             jl_get_ptls_states()->world_age = jl_world_counter;
         jl_value_t *stmt = jl_array_ptr_ref(stmts, s->ip);
         if (jl_is_gotonode(stmt)) {
+            s->last_branch = s->ip;
             s->ip = jl_gotonode_label(stmt) - 1;
             continue;
         }
+        else if (jl_is_pinode(stmt)) {
+            jl_value_t *val = eval_value(jl_fieldref_noalloc(stmt, 0), s);
+#ifndef JL_NDEBUG
+            jl_typeassert(val, jl_fieldref_noalloc(stmt, 1));
+#endif
+            return val;
+        }
         else if (jl_is_expr(stmt)) {
+            // Most exprs are allowed to end a BB by fall through
+            s->last_branch = s->ip;
             jl_sym_t *head = ((jl_expr_t*)stmt)->head;
             if (head == return_sym) {
                 return eval_value(jl_exprarg(stmt, 0), s);
             }
             else if (head == assign_sym) {
                 jl_value_t *sym = jl_exprarg(stmt, 0);
-                jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
+                jl_value_t *rhs = NULL;
+                if (jl_is_phinode(jl_exprarg(stmt, 1))) {
+                    jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(jl_exprarg(stmt, 1), 0);
+                    ssize_t edge = -1;
+                    for (int i = 0; i < jl_array_len(edges); ++i) {
+                        size_t from = jl_unbox_long(jl_arrayref(edges, i));
+                        if (from == s->last_branch) {
+                            edge = i;
+                            break;
+                        }
+                    }
+                    if (edge == -1) {
+                        jl_error("PhiNode edges do not contain last branch");
+                    }
+                    jl_value_t *val = jl_arrayref((jl_array_t*)jl_fieldref_noalloc(jl_exprarg(stmt, 1), 1), edge);
+                    rhs = eval_value(val, s);
+                } else {
+                    rhs = eval_value(jl_exprarg(stmt, 1), s);
+                }
                 if (jl_is_ssavalue(sym)) {
                     ssize_t genid = ((jl_ssavalue_t*)sym)->id;
                     if (genid >= jl_source_nssavalues(s->src) || genid < 0)
@@ -609,6 +646,7 @@ SECT_INTERP static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s
             }
         }
         else if (jl_is_newvarnode(stmt)) {
+            s->last_branch = s->ip;
             jl_value_t *var = jl_fieldref(stmt, 0);
             assert(jl_is_slot(var));
             ssize_t n = jl_slot_number(var);
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index ecbf1fb53aa39..ca7e0f45db92b 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -356,6 +356,9 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     if (unboxed) {
         if (!dest)
             return unboxed;
+        Type *dest_ty = unboxed->getType()->getPointerTo();
+        if (dest->getType() != dest_ty)
+            dest = emit_bitcast(ctx, dest, dest_ty);
         ctx.builder.CreateStore(unboxed, dest);
         return NULL;
     }
diff --git a/src/jltypes.c b/src/jltypes.c
index 8d790c675b0e6..98161740ffa21 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -90,6 +90,8 @@ jl_datatype_t *jl_globalref_type;
 jl_datatype_t *jl_linenumbernode_type;
 jl_datatype_t *jl_labelnode_type;
 jl_datatype_t *jl_gotonode_type;
+jl_datatype_t *jl_pinode_type;
+jl_datatype_t *jl_phinode_type;
 jl_datatype_t *jl_quotenode_type;
 jl_datatype_t *jl_newvarnode_type;
 jl_datatype_t *jl_intrinsic_type;
@@ -1976,6 +1978,16 @@ void jl_init_types(void)
                         jl_perm_symsvec(1, "label"),
                         jl_svec(1, jl_long_type), 0, 0, 1);
 
+    jl_pinode_type =
+        jl_new_datatype(jl_symbol("PiNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(2, "val", "typ"),
+                        jl_svec(2, jl_any_type, jl_any_type), 0, 0, 2);
+
+    jl_phinode_type =
+        jl_new_datatype(jl_symbol("PhiNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(2, "edges", "values"),
+                        jl_svec(2, jl_array_any_type, jl_array_any_type), 0, 0, 2);
+
     jl_quotenode_type =
         jl_new_datatype(jl_symbol("QuoteNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "value"),
@@ -2198,6 +2210,8 @@ void jl_init_types(void)
     jl_compute_field_offsets(jl_labelnode_type);
     jl_compute_field_offsets(jl_gotonode_type);
     jl_compute_field_offsets(jl_quotenode_type);
+    jl_compute_field_offsets(jl_pinode_type);
+    jl_compute_field_offsets(jl_phinode_type);
     jl_compute_field_offsets(jl_module_type);
     jl_compute_field_offsets(jl_method_instance_type);
     jl_compute_field_offsets(jl_unionall_type);
diff --git a/src/julia.h b/src/julia.h
index e174ee6fb1751..de449173e0100 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -598,6 +598,8 @@ extern JL_DLLEXPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_labelnode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_quotenode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_newvarnode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_datatype_t *jl_intrinsic_type JL_GLOBALLY_ROOTED;
@@ -940,6 +942,8 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_globalref(v)   jl_typeis(v,jl_globalref_type)
 #define jl_is_labelnode(v)   jl_typeis(v,jl_labelnode_type)
 #define jl_is_gotonode(v)    jl_typeis(v,jl_gotonode_type)
+#define jl_is_pinode(v)      jl_typeis(v,jl_pinode_type)
+#define jl_is_phinode(v)     jl_typeis(v,jl_phinode_type)
 #define jl_is_quotenode(v)   jl_typeis(v,jl_quotenode_type)
 #define jl_is_newvarnode(v)  jl_typeis(v,jl_newvarnode_type)
 #define jl_is_linenode(v)    jl_typeis(v,jl_linenumbernode_type)
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 283d87c80ce47..ec4de625ddb16 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -1031,6 +1031,7 @@ extern jl_sym_t *boundscheck_sym;
 extern jl_sym_t *gc_preserve_begin_sym; extern jl_sym_t *gc_preserve_end_sym;
 extern jl_sym_t *generated_sym;
 extern jl_sym_t *generated_only_sym;
+extern jl_sym_t *throw_undef_if_not_sym;
 
 struct _jl_sysimg_fptrs_t;
 
diff --git a/src/rtutils.c b/src/rtutils.c
index b67ca628e0c53..22a1e8b1fa5ad 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -821,8 +821,16 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         }
     }
     else if (jl_is_array_type(vt)) {
-        n += jl_static_show_x(out, (jl_value_t*)vt, depth);
-        n += jl_printf(out, "[");
+        n += jl_printf(out, "Array{");
+        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth);
+        n += jl_printf(out, ", (");
+        size_t i, ndims = jl_array_ndims(v);
+        if (ndims == 1)
+            n += jl_printf(out, "%" PRIdPTR ",", jl_array_dim0(v));
+        else
+            for (i = 0; i < ndims; i++)
+                n += jl_printf(out, (i > 0 ? ", %" PRIdPTR : "%" PRIdPTR), jl_array_dim(v, i));
+        n += jl_printf(out, ")}[");
         size_t j, tlen = jl_array_len(v);
         jl_array_t *av = (jl_array_t*)v;
         jl_datatype_t *el_type = (jl_datatype_t*)jl_tparam0(vt);
diff --git a/src/staticdata.c b/src/staticdata.c
index 790d19cf50dc5..50db4912dde53 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -1572,6 +1572,7 @@ static void jl_init_serializer2(int for_serialize)
                      lambda_sym, jl_symbol("tuple"), assign_sym,
                      jl_labelnode_type, jl_linenumbernode_type,
                      jl_gotonode_type, jl_quotenode_type,
+                     jl_pinode_type, jl_phinode_type,
                      jl_type_type, jl_bottom_type, jl_ref_type, jl_pointer_type,
                      jl_vararg_type, jl_abstractarray_type,
                      jl_densearray_type, jl_void_type, jl_function_type, jl_typeofbottom_type,
@@ -1596,6 +1597,7 @@ static void jl_init_serializer2(int for_serialize)
                      jl_unionall_type->name, jl_intrinsic_type->name, jl_task_type->name,
                      jl_labelnode_type->name, jl_linenumbernode_type->name, jl_builtin_type->name,
                      jl_gotonode_type->name, jl_quotenode_type->name,
+                     jl_pinode_type->name, jl_phinode_type->name,
                      jl_globalref_type->name, jl_typeofbottom_type->name,
                      jl_string_type->name, jl_abstractstring_type->name,
                      jl_namedtuple_type, jl_namedtuple_typename,
diff --git a/test/core.jl b/test/core.jl
index 0f56aaa36b611..d2adb82e7f500 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -526,11 +526,11 @@ function f21900()
         x = 0
     end
     global f21900_cnt += 1
-    x
+    x # should be global
     global f21900_cnt += -1000
     nothing
 end
-@test_throws UndefVarError f21900()
+@test_throws UndefVarError(:x) f21900()
 @test f21900_cnt == 1
 
 # use @eval so this runs as a toplevel scope block
@@ -3857,14 +3857,14 @@ foo9677(x::Array) = invoke(foo9677, Tuple{AbstractArray}, x)
 
 # issue #6846
 f6846() = (please6846; 2)
-@test_throws UndefVarError f6846()
+@test_throws UndefVarError(:please6846) f6846()
 
 module M6846
     macro f()
-        return :(please6846; 2)
+        return esc(:(please6846; 2))
     end
 end
-@test_throws UndefVarError @M6846.f()
+@test_throws UndefVarError(:please6846) @M6846.f()
 
 # issue #14758
 @test isa(@eval(f14758(; $([]...)) = ()), Function)
@@ -4580,10 +4580,10 @@ end
 B14878(ng) = B14878()
 function trigger14878()
     w = A14878()
-    w.ext[:14878] = B14878(junk)  # junk not defined!
+    w.ext[:14878] = B14878(junk)  # global junk not defined!
     return w
 end
-@test_throws UndefVarError trigger14878()
+@test_throws UndefVarError(:junk) trigger14878()
 
 # issue #1090
 function f1090(x)::Int
diff --git a/test/inline.jl b/test/inline.jl
index 4cc35d55e439f..8475e3d90acf1 100644
--- a/test/inline.jl
+++ b/test/inline.jl
@@ -67,7 +67,7 @@ function bar12620()
         foo_inl(i==1)
     end
 end
-@test_throws UndefVarError bar12620()
+@test_throws UndefVarError(:y) bar12620()
 
 # issue #16165
 @inline f16165(x) = (x = UInt(x) + 1)

From 20e02b81cdf871c9269320b811f8bc60c183ad3d Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Mon, 26 Feb 2018 11:17:33 -0500
Subject: [PATCH 2/2] [NewOptimizer] Track inlining info

---
 base/compiler/optimize.jl       | 58 +++++++++++++++++---------
 base/compiler/ssair/driver.jl   | 64 +++++++++++++++++++++++-----
 base/compiler/ssair/ir.jl       | 12 +++---
 base/compiler/ssair/legacy.jl   | 74 +++++++++++++++++++++++++++++++--
 base/compiler/ssair/slot2ssa.jl |  4 +-
 test/compiler/compiler.jl       | 28 ++++++-------
 6 files changed, 186 insertions(+), 54 deletions(-)

diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 77187344b106e..5002fea33738b 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -23,10 +23,11 @@ mutable struct OptimizationState
             s_edges = []
             frame.stmt_edges[1] = s_edges
         end
-        next_label = max(label_counter(frame.src.code), length(frame.src.code)) + 10
+        src = frame.src
+        next_label = max(label_counter(src.code), length(src.code)) + 10
         return new(frame.linfo, frame.vararg_type_container,
                    s_edges::Vector{Any},
-                   frame.src, frame.mod, frame.nargs,
+                   src, frame.mod, frame.nargs,
                    next_label, frame.min_valid, frame.max_valid,
                    frame.params)
     end
@@ -53,7 +54,7 @@ mutable struct OptimizationState
             inmodule = linfo.def::Module
             nargs = 0
         end
-        next_label = max(label_counter(frame.src.code), length(frame.src.code)) + 10
+        next_label = max(label_counter(src.code), length(src.code)) + 10
         vararg_type_container = nothing # if you want something more accurate, set it yourself :P
         return new(linfo, vararg_type_container,
                    s_edges::Vector{Any},
@@ -262,7 +263,7 @@ function isinlineable(m::Method, src::CodeInfo, mod::Module, params::Params, bon
     return inlineable
 end
 
-const enable_new_optimizer = RefValue{Bool}(false)
+const enable_new_optimizer = RefValue(false)
 
 # converge the optimization work
 function optimize(me::InferenceState)
@@ -294,12 +295,13 @@ function optimize(me::InferenceState)
             reindex_labels!(opt)
             nargs = Int(opt.nargs) - 1
             if def isa Method
-                topline = LineNumberNode(Int(def.line), def.file)
+                topline = LineInfoNode(opt.mod, def.name, def.file, def.line, 0)
             else
-                topline = LineNumberNode(0)
+                topline = LineInfoNode(opt.mod, NullLineInfo.name, NullLineInfo.file, 0, 0)
             end
-            ir = run_passes(opt.src, opt.mod, nargs, topline)
-            replace_code!(opt.src, ir, nargs, topline)
+            linetable = [topline]
+            ir = run_passes(opt.src, nargs, linetable)
+            replace_code!(opt.src, ir, nargs, linetable)
             push!(opt.src.code, LabelNode(length(opt.src.code) + 1))
             any_phi = true
         elseif !any_phi
@@ -1474,6 +1476,7 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
         end
     end
     local end_label # if it ends in a goto, we might need to add a come-from label
+    npops = 0 # we don't require them to balance, so find out how many need to be added
     for i = 1:body_len
         a = body.args[i]
         if isa(a, GotoNode)
@@ -1496,6 +1499,27 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
                 end
                 edges = Any[newlabels[edge::Int + 1] - 1 for edge in edges]
                 a.args[2] = PhiNode(edges, a.args[2].values)
+            elseif a.head === :meta && length(a.args) > 0
+                a1 = a.args[1]
+                if a1 === :push_loc
+                    npops += 1
+                elseif a1 === :pop_loc
+                    if length(a.args) > 1
+                        npops_loc = a.args[2]::Int
+                        if npops_loc > npops # corrupt IR - try to normalize it to limit the impact
+                            a.args[2] = npops
+                            npops = 0
+                        else
+                            npops -= npops_loc
+                        end
+                    else
+                        if npops == 0 # corrupt IR - try to normalize it to limit the impact
+                            body.args[i] = nothing
+                        else
+                            npops -= 1
+                        end
+                    end
+                end
             end
         end
     end
@@ -1575,27 +1599,23 @@ function inlineable(@nospecialize(f), @nospecialize(ft), e::Expr, atypes::Vector
             isa(linenode.file, Symbol) && (file = linenode.file)
         end
     end
-    if do_coverage
+    npops += 1
+    if do_coverage || !isempty(stmts)
+        pop_loc = (npops == 1) ? Expr(:meta, :pop_loc) : Expr(:meta, :pop_loc, npops)
         # Check if we are switching module, which is necessary to catch user
         # code inlined into `Base` with `--code-coverage=user`.
-        # Assume we are inlining directly into `enclosing` instead of another
-        # function inlined in it
         mod = method.module
-        if mod === sv.mod
+        if !do_coverage || mod === sv.mod
             pushfirst!(stmts, Expr(:meta, :push_loc, file,
                                  method.name, line))
         else
             pushfirst!(stmts, Expr(:meta, :push_loc, file,
                                  method.name, line, mod))
         end
-        push!(stmts, Expr(:meta, :pop_loc))
-    elseif !isempty(stmts)
-        pushfirst!(stmts, Expr(:meta, :push_loc, file,
-                             method.name, line))
-        if isa(stmts[end], LineNumberNode)
-            stmts[end] = Expr(:meta, :pop_loc)
+        if !do_coverage && !isempty(stmts) && isa(stmts[end], LineNumberNode)
+            stmts[end] = pop_loc
         else
-            push!(stmts, Expr(:meta, :pop_loc))
+            push!(stmts, pop_loc)
         end
     end
 
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index 998b356fb9d84..2d745829c3b3f 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -1,3 +1,12 @@
+struct LineInfoNode
+    mod::Module
+    method::Symbol
+    file::Symbol
+    line::Int
+    inlined_at::Int
+end
+const NullLineInfo = LineInfoNode(@__MODULE__, Symbol(""), Symbol(""), 0, 0)
+
 include("compiler/ssair/ir.jl")
 include("compiler/ssair/domtree.jl")
 include("compiler/ssair/slot2ssa.jl")
@@ -10,19 +19,41 @@ macro show(s)
     # return :(println($(QuoteNode(s)), " = ", $(esc(s))))
 end
 
-function normalize(@nospecialize(stmt), meta::Vector{Any}, inline::Vector{Any}, loc::RefValue{LineNumberNode})
+function normalize(@nospecialize(stmt), meta::Vector{Any}, table::Vector{LineInfoNode}, loc::RefValue{Int})
     if isa(stmt, Expr)
         if stmt.head == :meta
             args = stmt.args
             if length(args) > 0
                 a1 = args[1]
                 if a1 === :push_loc
-                    push!(inline, stmt)
+                    let
+                        current = loc[]
+                        filename = args[2]::Symbol
+                        methodname = NullLineInfo.method
+                        mod = table[current].mod
+                        line = 0
+                        for i = 3:length(args)
+                            ai = args[i]
+                            if ai isa Symbol
+                                methodname = ai
+                            elseif ai isa Int32
+                                line = Int(ai)
+                            elseif ai isa Int64
+                                line = Int(ai)
+                            elseif ai isa Module
+                                mod = ai
+                            end
+                        end
+                        push!(table, LineInfoNode(mod, methodname, filename, line, current))
+                        loc[] = length(table)
+                    end
                 elseif a1 === :pop_loc
                     n = (length(args) > 1) ? args[2]::Int : 1
                     for i in 1:n
-                        isempty(inline) && break
-                        pop!(inline)
+                        current = loc[]
+                        current = table[current].inlined_at
+                        current == 0 && break
+                        loc[] = current
                     end
                 else
                     push!(meta, stmt)
@@ -39,22 +70,35 @@ function normalize(@nospecialize(stmt), meta::Vector{Any}, inline::Vector{Any},
     elseif isa(stmt, LabelNode)
         return nothing
     elseif isa(stmt, LineNumberNode)
-        loc[] = stmt
+        let # need to expand this node so that it is source-location independent
+            current = loc[]
+            info = table[current]
+            methodname = info.method
+            mod = info.mod
+            file = stmt.file
+            file isa Symbol || (file = info.file)
+            line = stmt.line
+            push!(table, LineInfoNode(mod, methodname, file, line, info.inlined_at))
+            loc[] = length(table)
+        end
         return nothing
     end
     return stmt
 end
 
-function run_passes(ci::CodeInfo, mod::Module, nargs::Int, toploc::LineNumberNode)
+function run_passes(ci::CodeInfo, nargs::Int, linetable::Vector{LineInfoNode})
+    mod = linetable[1].mod
     ci.code = copy(ci.code)
     meta = Any[]
-    lines = fill(LineNumberNode(0), length(ci.code))
-    let inline = Any[], loc = RefValue(toploc)
+    lines = fill(0, length(ci.code))
+    let loc = RefValue(1)
         for i = 1:length(ci.code)
             stmt = ci.code[i]
-            stmt = normalize(stmt, meta, inline, loc)
+            stmt = normalize(stmt, meta, linetable, loc)
             ci.code[i] = stmt
-            stmt === nothing || (lines[i] = loc[])
+            if !(stmt === nothing)
+                lines[i] = loc[]
+            end
         end
     end
     ci.code = strip_trailing_junk!(ci.code, lines)
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index c09bf9067c28b..8729d6007d2d5 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -120,22 +120,22 @@ function first_insert_for_bb(code, cfg, block)
 end
 
 
-const NewNode = Tuple{Int, Any, Any, LineNumberNode}
+const NewNode = Tuple{Int, Any, Any, #=LineNumber=#Int}
 
 struct IRCode
     stmts::Vector{Any}
     types::Vector{Any}
-    lines::Vector{LineNumberNode}
+    lines::Vector{Int}
     argtypes::Vector{Any}
     cfg::CFG
     new_nodes::Vector{NewNode}
     mod::Module
     meta::Vector{Any}
 
-    function IRCode(stmts::Vector{Any}, lines::Vector{LineNumberNode}, cfg::CFG, argtypes::Vector{Any}, mod::Module, meta::Vector{Any})
+    function IRCode(stmts::Vector{Any}, lines::Vector{Int}, cfg::CFG, argtypes::Vector{Any}, mod::Module, meta::Vector{Any})
         return new(stmts, Any[], lines, argtypes, cfg, NewNode[], mod, meta)
     end
-    function IRCode(ir::IRCode, stmts::Vector{Any}, types::Vector{Any}, lines::Vector{LineNumberNode}, cfg::CFG, new_nodes::Vector{NewNode})
+    function IRCode(ir::IRCode, stmts::Vector{Any}, types::Vector{Any}, lines::Vector{Int}, cfg::CFG, new_nodes::Vector{NewNode})
         return new(stmts, types, lines, ir.argtypes, cfg, new_nodes, ir.mod, ir.meta)
     end
 end
@@ -317,7 +317,7 @@ mutable struct IncrementalCompact
     ir::IRCode
     result::Vector{Any}
     result_types::Vector{Any}
-    result_lines::Vector{LineNumberNode}
+    result_lines::Vector{Int}
     ssa_rename::Vector{Any}
     used_ssas::Vector{Int}
     late_fixup::Vector{Int}
@@ -331,7 +331,7 @@ mutable struct IncrementalCompact
         new_len = length(code.stmts) + length(code.new_nodes)
         result = Array{Any}(uninitialized, new_len)
         result_types = Array{Any}(uninitialized, new_len)
-        result_lines = Array{LineNumberNode}(uninitialized, new_len)
+        result_lines = Array{Int}(uninitialized, new_len)
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
         used_ssas = fill(0, new_len)
         late_fixup = Vector{Int}()
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 8e980bee1fec7..08fc8620f7b1b 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -10,7 +10,74 @@ function ssaargmap(f, @nospecialize(stmt))
     urs[]
 end
 
-function replace_code!(ci::CodeInfo, code::IRCode, nargs::Int, topline::LineNumberNode)
+function line_to_vector(line::Int, linetable::Vector{LineInfoNode})
+    lines = Int[]
+    while line != 0
+        push!(lines, line)
+        line = linetable[line].inlined_at
+    end
+    return lines
+end
+
+function push_new_lineinfo!(new_code::Vector{Any}, topline::Int, line::Int, linetable::Vector{LineInfoNode})
+    # separate the info into three sets: pops, line-change, pushes
+    do_coverage = coverage_enabled()
+    topmod = linetable[line].mod
+    toplines = line_to_vector(topline, linetable)
+    lines = line_to_vector(line, linetable)
+    while !isempty(lines) && !isempty(toplines) && lines[end] == toplines[end]
+        # remove common frames, recording changes to topmod
+        topmod = linetable[pop!(lines)].mod
+        pop!(toplines)
+    end
+    # check whether the outermost frame changed, or just the line number
+    newframe = true
+    topfile = NullLineInfo.file
+    if !isempty(lines) && !isempty(toplines)
+        let topline = linetable[toplines[end]]
+            line = linetable[lines[end]]
+            if topline.inlined_at == 0 || (topline.mod === line.mod && topline.method === line.method)
+                # we could track frame_id precisely, but llvm / dwarf has no support for that,
+                # and it wouldn't really be that meaningful after statements moved around,
+                # so we just do fuzzy matching here in the legacy-format writer
+                newframe = false
+                topfile = topline.file
+            end
+        end
+    end
+    # first pop the old frame(s)
+    npops = length(toplines) + newframe - 1
+    if npops > 0
+        push!(new_code, (npops == 1) ? Expr(:meta, :pop_loc) : Expr(:meta, :pop_loc, npops))
+    end
+    # then change the line number
+    if !newframe
+        let line = linetable[pop!(lines)]
+            if line.file === topfile
+                loc = LineNumberNode(line.line)
+            else
+                loc = LineNumberNode(line.line, line.file)
+            end
+            push!(new_code, loc)
+            topmod = line.mod
+        end
+    end
+    # then push the new frames
+    while !isempty(lines)
+        let line = linetable[pop!(lines)]
+            if !do_coverage || line.mod == topmod
+                loc = Expr(:meta, :push_loc, line.file, line.method, line.line)
+            else
+                loc = Expr(:meta, :push_loc, line.file, line.method, line.line, line.mod)
+            end
+            push!(new_code, loc)
+            topmod = line.mod
+        end
+    end
+    nothing
+end
+
+function replace_code!(ci::CodeInfo, code::IRCode, nargs::Int, linetable::Vector{LineInfoNode})
     if !isempty(code.new_nodes)
         code = compact!(code)
     end
@@ -68,6 +135,7 @@ function replace_code!(ci::CodeInfo, code::IRCode, nargs::Int, topline::LineNumb
     label_mapping = IdDict{Int, Int}()
     terminator_mapping = IdDict{Int, Int}()
     fixup = Int[]
+    topline = 1
     for (idx, stmt) in pairs(code.stmts)
         line = code.lines[idx]
         # push labels first
@@ -78,8 +146,8 @@ function replace_code!(ci::CodeInfo, code::IRCode, nargs::Int, topline::LineNumb
             push!(new_code, LabelNode(length(new_code) + 1))
         end
         # then metadata
-        if !(line.file === nothing && line.line === 0) && !(line === topline)
-            push!(new_code, line)
+        if line != 0 && line != topline
+            push_new_lineinfo!(new_code, topline, line, linetable)
             topline = line
         end
         # record if this'll need a fixup after stmt number
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index eaf44dbcc3aa3..83d74d5874e6e 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -183,7 +183,7 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r
     return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(code::Vector{Any}, lines::Vector{LineNumberNode})
+function strip_trailing_junk!(code::Vector{Any}, lines::Vector{Int})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
     for i = length(code):-1:1
@@ -198,7 +198,7 @@ function strip_trailing_junk!(code::Vector{Any}, lines::Vector{LineNumberNode})
     term = code[end]
     if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
         push!(code, ReturnNode{Any}())
-        push!(lines, LineNumberNode(0))
+        push!(lines, 0)
     end
     return code
 end
diff --git a/test/compiler/compiler.jl b/test/compiler/compiler.jl
index ee3bf7fb98b1f..d6c107b3f669b 100644
--- a/test/compiler/compiler.jl
+++ b/test/compiler/compiler.jl
@@ -1063,25 +1063,25 @@ function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val
     end
 end
 
-function find_call(code, func, narg)
-    for ex in code
+function find_call(code::Core.CodeInfo, @nospecialize(func), narg)
+    for ex in code.code
+        Meta.isexpr(ex, :(=)) && (ex = ex.args[2])
         isa(ex, Expr) || continue
-        ex = ex::Expr
         if ex.head === :call && length(ex.args) == narg
             farg = ex.args[1]
             if isa(farg, GlobalRef)
-                farg = farg::GlobalRef
                 if isdefined(farg.mod, farg.name) && isconst(farg.mod, farg.name)
-                    farg = getfield(farg.mod, farg.name)
+                    farg = typeof(getfield(farg.mod, farg.name))
                 end
+            elseif isa(farg, Core.SSAValue)
+                farg = code.ssavaluetypes[farg.id + 1]
+            else
+                farg = typeof(farg)
             end
-            if farg === func
+            if farg === typeof(func)
                 return true
             end
-        elseif Core.Compiler.is_meta_expr(ex)
-            continue
         end
-        find_call(ex.args, func, narg) && return true
     end
     return false
 end
@@ -1096,24 +1096,24 @@ test_const_return(()->sizeof(1 < 2), Tuple{}, 1)
 
 # Make sure Core.sizeof with a ::DataType as inferred input type is inferred but not constant.
 function sizeof_typeref(typeref)
-    Core.sizeof(typeref[])
+    return Core.sizeof(typeref[])
 end
 @test @inferred(sizeof_typeref(Ref{DataType}(Int))) == sizeof(Int)
-@test find_call(first(code_typed(sizeof_typeref, (Ref{DataType},))[1]).code, Core.sizeof, 2)
+@test find_call(first(code_typed(sizeof_typeref, (Ref{DataType},))[1]), Core.sizeof, 2)
 # Constant `Vector` can be resized and shouldn't be optimized to a constant.
 const constvec = [1, 2, 3]
 @eval function sizeof_constvec()
-    Core.sizeof($constvec)
+    return Core.sizeof($constvec)
 end
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 3
-@test find_call(first(code_typed(sizeof_constvec, ())[1]).code, Core.sizeof, 2)
+@test find_call(first(code_typed(sizeof_constvec, ())[1]), Core.sizeof, 2)
 push!(constvec, 10)
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 4
 
 test_const_return((x)->isdefined(x, :re), Tuple{ComplexF64}, true)
 isdefined_f3(x) = isdefined(x, 3)
 @test @inferred(isdefined_f3(())) == false
-@test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]).code, isdefined, 3)
+@test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]), isdefined, 3)
 
 let isa_tfunc = Core.Compiler.T_FFUNC_VAL[
         findfirst(x->x===isa, Core.Compiler.T_FFUNC_KEY)][3]