JuliaStats · nalimilan · Oct 19, 2017 · Sep 27, 2017 · Sep 27, 2017 · Oct 4, 2017
diff --git a/docs/src/da.md b/docs/src/da.md
@@ -5,8 +5,8 @@ CurrentModule = DataArrays
 ```
 
 ```@docs
-NA
-NAtype
+null
+Null
 ```
 
 ## Arrays with possibly missing data
@@ -19,9 +19,11 @@ DataArray
 DataVector
 DataMatrix
 @data
-isna
-dropna
-padna
+isnull
+Nulls.fail
+Nulls.skip
+Nulls.replace
+padnull
 levels
 ```
 

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,11 +1,10 @@
 # DataArrays.jl
 
-This package provides functionality for working with [missing data](https://en.wikipedia.org/wiki/Missing_data)
-in Julia.
+This package provides array types for working efficiently with [missing data](https://en.wikipedia.org/wiki/Missing_data)
+in Julia, based on the `null` value from the [Nulls.jl](https://github.com/JuliaData/Nulls.jl) package.
 In particular, it provides the following:
 
-* `NA`: A singleton representing a missing value
-* `DataArray{T}`: An array type that can house both values of type `T` and missing values
+* `DataArray{T}`: An array type that can house both values of type `T` and missing values (of type `Null`)
 * `PooledDataArray{T}`: An array type akin to `DataArray` but optimized for arrays with a smaller set of unique
   values, as commonly occurs with categorical data
 

diff --git a/src/DataArrays.jl b/src/DataArrays.jl
@@ -4,6 +4,7 @@ module DataArrays
     using Base: promote_op
     using Base.Cartesian, Reexport
     @reexport using StatsBase
+    @reexport using Nulls
     using SpecialFunctions
 
     const DEFAULT_POOLED_REF_TYPE = UInt32
@@ -25,23 +26,15 @@ module DataArrays
            DataArray,
            DataMatrix,
            DataVector,
-           dropna,
-           each_failna,
-           each_dropna,
-           each_replacena,
-           EachFailNA,
-           EachDropNA,
-           EachReplaceNA,
+           EachFailNull,
+           EachDropNull,
+           EachReplaceNull,
            FastPerm,
            getpoolidx,
            gl,
            head,
-           isna,
            levels,
-           NA,
-           NAException,
-           NAtype,
-           padna,
+           padnull,
            pdata,
            PooledDataArray,
            PooledDataMatrix,
@@ -55,7 +48,6 @@ module DataArrays
            tail
 
     include("utils.jl")
-    include("natype.jl")
     include("abstractdataarray.jl")
     include("dataarray.jl")
     include("pooleddataarray.jl")
@@ -71,7 +63,6 @@ module DataArrays
     include("extras.jl")
     include("grouping.jl")
     include("statistics.jl")
-    include("predicates.jl")
     include("literals.jl")
     include("deprecated.jl")
 end
diff --git a/src/abstractdataarray.jl b/src/abstractdataarray.jl
@@ -2,9 +2,9 @@
     AbstractDataArray{T, N}
 
 An `N`-dimensional `AbstractArray` whose entries can take on values of type
-`T` or the value `NA`.
+`T` or the value `null`.
 """
-abstract type AbstractDataArray{T, N} <: AbstractArray{Data{T}, N} end
+abstract type AbstractDataArray{T, N} <: AbstractArray{Union{T,Null}, N} end
 
 """
     AbstractDataVector{T}
@@ -20,45 +20,43 @@ A 2-dimensional [`AbstractDataArray`](@ref) with element type `T`.
 """
 const AbstractDataMatrix{T} = AbstractDataArray{T, 2}
 
-Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,NAtype}
+Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,Null}
 
 # Generic iteration over AbstractDataArray's
 
 Base.start(x::AbstractDataArray) = 1
 Base.next(x::AbstractDataArray, state::Integer) = (x[state], state + 1)
 Base.done(x::AbstractDataArray, state::Integer) = state > length(x)
 
-Base.broadcast{T}(::typeof(isna), a::AbstractArray{T}) =
-    NAtype <: T ? BitArray(map(x->isa(x, NAtype), a)) : falses(size(a)) # -> BitArray
-
+# FIXME: type piracy
 """
-    isna(a::AbstractArray, i) -> Bool
+    isnull(a::AbstractArray, i) -> Bool
 
-Determine whether the element of `a` at index `i` is missing, i.e. `NA`.
+Determine whether the element of `a` at index `i` is missing, i.e. `null`.
 
 # Examples
 
 ```jldoctest
-julia> X = @data [1, 2, NA];
+julia> X = @data [1, 2, null];
 
-julia> isna(X, 2)
+julia> isnull(X, 2)
 false
 
-julia> isna(X, 3)
+julia> isnull(X, 3)
 true
 ```
 """
-isna(a::AbstractArray{T}, i::Real) where {T} = NAtype <: T ? isa(a[i], NAtype) : false # -> Bool
+Base.isnull(a::AbstractArray{T}, i::Real) where {T} = Null <: T ? isa(a[i], Null) : false # -> Bool
 
 """
     dropna(v::AbstractVector) -> AbstractVector
 
-Return a copy of `v` with all `NA` elements removed.
+Return a copy of `v` with all `null` elements removed.
 
 # Examples
 
 ```jldoctest
-julia> dropna(@data [NA, 1, NA, 2])
+julia> dropna(@data [null, 1, null, 2])
 2-element Array{Int64,1}:
  1
  2
@@ -76,53 +74,50 @@ dropna(v::AbstractVector) = copy(v) # -> AbstractVector
 # TODO: Use values()
 #       Use DataValueIterator type?
 
-struct EachFailNA{T}
+struct EachFailNull{T}
     da::AbstractDataArray{T}
 end
-each_failna(da::AbstractDataArray{T}) where {T} = EachFailNA(da)
-Base.length(itr::EachFailNA) = length(itr.da)
-Base.start(itr::EachFailNA) = 1
-Base.done(itr::EachFailNA, ind::Integer) = ind > length(itr)
-function Base.next(itr::EachFailNA, ind::Integer)
-    if isna(itr.da[ind])
-        throw(NAException())
+Nulls.fail(da::AbstractDataArray{T}) where {T} = EachFailNull(da)
+Base.length(itr::EachFailNull) = length(itr.da)
+Base.start(itr::EachFailNull) = 1
+Base.done(itr::EachFailNull, ind::Integer) = ind > length(itr)
+function Base.next(itr::EachFailNull, ind::Integer)
+    if isnull(itr.da[ind])
+        throw(NullException())
     else
         (itr.da[ind], ind + 1)
     end
 end
 
-struct EachDropNA{T}
+struct EachDropNull{T}
     da::AbstractDataArray{T}
 end
-each_dropna(da::AbstractDataArray{T}) where {T} = EachDropNA(da)
+Nulls.skip(da::AbstractDataArray{T}) where {T} = EachDropNull(da)
 function _next_nonna_ind(da::AbstractDataArray{T}, ind::Int) where T
     ind += 1
-    while ind <= length(da) && isna(da, ind)
+    while ind <= length(da) && isnull(da, ind)
         ind += 1
     end
     ind
 end
-Base.length(itr::EachDropNA) = length(itr.da) - sum(itr.da.na)
-Base.start(itr::EachDropNA) = _next_nonna_ind(itr.da, 0)
-Base.done(itr::EachDropNA, ind::Int) = ind > length(itr.da)
-function Base.next(itr::EachDropNA, ind::Int)
+Base.length(itr::EachDropNull) = length(itr.da) - sum(itr.da.na)
+Base.start(itr::EachDropNull) = _next_nonna_ind(itr.da, 0)
+Base.done(itr::EachDropNull, ind::Int) = ind > length(itr.da)
+function Base.next(itr::EachDropNull, ind::Int)
     (itr.da[ind], _next_nonna_ind(itr.da, ind))
 end
 
-struct EachReplaceNA{S, T}
+struct EachReplaceNull{S, T}
     da::AbstractDataArray{S}
     replacement::T
 end
-function each_replacena(da::AbstractDataArray, replacement::Any)
-    EachReplaceNA(da, convert(eltype(da), replacement))
-end
-function each_replacena(replacement::Any)
-    x -> each_replacena(x, replacement)
+function Nulls.replace(da::AbstractDataArray, replacement::Any)
+    EachReplaceNull(da, convert(eltype(da), replacement))
 end
-Base.length(itr::EachReplaceNA) = length(itr.da)
-Base.start(itr::EachReplaceNA) = 1
-Base.done(itr::EachReplaceNA, ind::Integer) = ind > length(itr)
-function Base.next(itr::EachReplaceNA, ind::Integer)
-    item = isna(itr.da, ind) ? itr.replacement : itr.da[ind]
+Base.length(itr::EachReplaceNull) = length(itr.da)
+Base.start(itr::EachReplaceNull) = 1
+Base.done(itr::EachReplaceNull, ind::Integer) = ind > length(itr)
+function Base.next(itr::EachReplaceNull, ind::Integer)
+    item = isnull(itr.da, ind) ? itr.replacement : itr.da[ind]
     (item, ind + 1)
 end
diff --git a/src/broadcast.jl b/src/broadcast.jl
@@ -5,24 +5,24 @@ _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...))
 
 # Get ref for value for a PooledDataArray, adding to the pool if
 # necessary
-_unsafe_pdaref!(Bpool, Brefdict::Dict, val::NAtype) = 0
+_unsafe_pdaref!(Bpool, Brefdict::Dict, val::Null) = 0
 function _unsafe_pdaref!(Bpool, Brefdict::Dict, val)
     @get! Brefdict val begin
         push!(Bpool, val)
         length(Bpool)
     end
 end
 
-# Generate a branch for each possible combination of NA/not NA. This
+# Generate a branch for each possible combination of null/not null. This
 # gives good performance at the cost of 2^narrays branches.
 function gen_na_conds(f, nd, arrtype, outtype,
-    daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isna=())
+    daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isnull=())
 
     if pos > length(daidx)
         args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)]
         for i = 1:length(daidx)
-            if isna[i]
-                args[daidx[i]] = NA
+            if isnull[i]
+                args[daidx[i]] = null
             end
         end
 
@@ -39,15 +39,15 @@ function gen_na_conds(f, nd, arrtype, outtype,
     else
         k = daidx[pos]
         quote
-            if $(Symbol("isna_$(k)"))
-                $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., true)))
+            if $(Symbol("isnull_$(k)"))
+                $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isnull..., true)))
             else
                 $(if arrtype[k] <: DataArray
                     :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))])
                 else
                     :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))])
                 end)
-                $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., false)))
+                $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isnull..., false)))
             end
         end
     end
@@ -128,13 +128,13 @@ Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) where {F} =
 
             # body
             begin
-                # Advance iterators for DataArray and determine NA status
+                # Advance iterators for DataArray and determine null status
                 $(Expr(:block, [
                     As[k] <: DataArray ? quote
-                        @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0")))
+                        @inbounds $(Symbol("isnull_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0")))
                     end : As[k] <: PooledDataArray ? quote
                         @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d"))
-                        $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0
+                        $(Symbol("isnull_$(k)")) = $(Symbol("r_$(k)")) == 0
                     end : nothing
                 for k = 1:N]...))
 
@@ -190,20 +190,20 @@ Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray     = PooledDa
 Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A)
 
 @inline function broadcast_t(f, ::Type{T}, shape, A, Bs...) where {T}
-    dest = Base.Broadcast.containertype(A, Bs...)(extractT(T), Base.index_lengths(shape...))
+    dest = Base.Broadcast.containertype(A, Bs...)(Nulls.T(T), Base.index_lengths(shape...))
     return broadcast!(f, dest, A, Bs...)
 end
 
-# This is mainly to handle isna.(x) since isna is probably the only
-# function that can guarantee that NAs will never propagate
+# This is mainly to handle isnull.(x) since isnull is probably the only
+# function that can guarantee that nulls will never propagate
 @inline function broadcast_t(f, ::Type{Bool}, shape, A, Bs...)
     dest = similar(BitArray, shape)
     return broadcast!(f, dest, A, Bs...)
 end
 
 # This one is almost identical to the version in Base and can hopefully be
 # removed at some point. The main issue in Base is that it tests for
-# isleaftype(T) which is false for Union{T,NAtype}. If the test in Base
+# isleaftype(T) which is false for Union{T,Null}. If the test in Base
 # can be modified to cover simple unions of leaftypes then this method
 # can probably be deleted and the two _t methods adjusted to match the Base
 # invokation from Base.Broadcast.broadcast_c
@@ -214,5 +214,5 @@ end
 end
 
 # This one is much faster than normal broadcasting but the method won't get called
-# in fusing operations like (!).(isna.(x))
-Base.broadcast(::typeof(isna), da::DataArray) = copy(da.na)
+# in fusing operations like (!).(isnull.(x))
+Base.broadcast(::typeof(isnull), da::DataArray) = copy(da.na)