diff --git a/.travis.yml b/.travis.yml
index ea0a185852..42e740c899 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,6 @@
 
 language: julia
 julia:
-  - 0.4
   - 0.5
   - nightly
 os:
@@ -15,6 +14,5 @@ script:
   - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
   - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("DataFrames"); Pkg.test("DataFrames"; coverage=true)'
 after_success:
-  - julia -e 'cd(Pkg.dir("DataFrames")); Pkg.clone("https://github.com/MichaelHatherly/Documenter.jl"); include(joinpath("docs", "make.jl"))'
+  - julia -e 'cd(Pkg.dir("DataFrames")); Pkg.add("Documenter"); include(joinpath("docs", "make.jl"))'
   - julia -e 'cd(Pkg.dir("DataFrames")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
-  
diff --git a/REQUIRE b/REQUIRE
index b20aea22d9..79622049cf 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,6 +1,7 @@
-julia 0.4
-DataArrays 0.3.4
-StatsBase 0.8.3
+julia 0.5
+NullableArrays 0.0.8
+CategoricalArrays 0.0.6
+StatsBase 0.11.0
 GZip
 SortingAlgorithms
 Reexport
diff --git a/appveyor.yml b/appveyor.yml
index cfc1085114..84c37acbda 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,7 +1,5 @@
 environment:
   matrix:
-  - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
-  - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
   - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
   - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
   - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
diff --git a/benchmark/datamatrix.jl b/benchmark/datamatrix.jl
deleted file mode 100644
index c856a15020..0000000000
--- a/benchmark/datamatrix.jl
+++ /dev/null
@@ -1,37 +0,0 @@
-a = eye(100)
-b = eye(100)
-
-dm_a = data(a)
-dm_b = data(b)
-
-dm_a_na = copy(dm_a)
-dm_a_na[:, :] = NA
-dm_b_na = copy(dm_b)
-dm_b_na[:, :] = NA
-
-f1() = *(a, b)
-f2() = *(dm_a, dm_b)
-f3() = *(dm_a_na, dm_b_na)
-
-df1 = benchmark(f1,
-                "Linear Algebra",
-                "Matrix Multiplication w/ No NA's",
-                1_000)
-df2 = benchmark(f2,
-                "Linear Algebra",
-                "DataMatrix Multiplication w/ No NA's",
-                1_000)
-df3 = benchmark(f3,
-                "Linear Algebra",
-                "DataMatrix Multiplication w/ NA's",
-                1_000)
-
-# TODO: Keep permanent record
-printtable(vcat(df1, df2, df3), header=false)
-
-# Compare with R
-# We're 10x as fast!
-# a <- diag(100)
-# b <- diag(100)
-# a %*% b
-# s <- Sys.time(); a %*% b; e <- Sys.time(); e - s
diff --git a/benchmark/datavector.jl b/benchmark/datavector.jl
deleted file mode 100644
index 1139fa9114..0000000000
--- a/benchmark/datavector.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-srand(1)
-N = 1_000_000
-v = randn(N)
-dv = DataArray(v)
-dvna = deepcopy(dv)
-dvna[rand(1:N, 10_000)] = NA
-idxv = shuffle([1:N])
-idxdv = DataArray(idxv)
-
-f1(v) = sum(v)
-f2(v) = sum(dropna(v))
-f3(v) = sum(dropna(v)) # Make this an iterator
-f4(v) = mean(v)
-f5(v) = mean(dropna(v))
-f6(v) = mean(dropna(v)) # Make this an iterator
-f7(v1, v2) = v1 + v2
-f8(v1, v2) = v1 .> v2
-f9(v, i) = v[i]
-
-perf_test = Dict()
-
-perf_test["sum(v): Vector with no NA's"] = () -> f1(v)
-perf_test["sum(dv): DataVector with no NA's"] = () -> f1(dv)
-perf_test["sum(dropna(dv)): DataVector with no NA's"] = () -> f2(dv)
-perf_test["sum(*dropna(dv)): DataVector with no NA's"] = () -> f3(dv)
-
-perf_test["sum(dvna): DataVector with NA's"] = () -> f4(dv)
-perf_test["sum(dropna(dvna)): DataVector with NA's"] = () -> f5(dv)
-perf_test["sum(*dropna(dvna)): DataVector with NA's"] = () -> f6(dv)
-
-perf_test["mean(v): Vector with no NA's"] = () -> f4(v)
-perf_test["mean(dv): DataVector with no NA's"] = () -> f4(dv)
-perf_test["mean(dropna(dv)): DataVector with no NA's"] = () -> f5(dv)
-perf_test["mean(*dropna(dv)): DataVector with no NA's"] = () -> f6(dv)
-
-perf_test["mean(dvna): DataVector with NA's"] = () -> f4(dv)
-perf_test["mean(dropna(dvna)): DataVector with NA's"] = () -> f5(dv)
-perf_test["mean(*dropna(dvna)): DataVector with NA's"] = () -> f6(dv)
-
-perf_test["v + 1.0 : Vector"] = () -> f7(v, 1.0)
-perf_test["dv + 1.0 : DataVector with no NA's"] = () -> f7(dv, 1.0)
-perf_test["dvna + 1.0 : DataVector with NA's"] = () -> f7(dvna, 1.0)
-
-perf_test["v .> 1.0 : Vector"] = () -> f8(v, 1.0)
-perf_test["dv .> 1.0 : DataVector with no NA's"] = () -> f8(dv, 1.0)
-perf_test["dvna .> 1.0 : DataVector with NA's"] = () -> f8(dvna, 1.0)
-
-perf_test["v[idxv] : Vector"] = () -> f9(v, idxv)
-perf_test["dv[idxv] : DataVector and Vector indexing"] = () -> f9(dv, idxv)
-perf_test["dv[idxdv] : DataVector and DataVector indexing"] = () -> f9(dv, idxdv)
-
-for (name, f) in perf_test
-    res = benchmark(f, "DataArray Operations", name, 10)
-    # TODO: Keep permanent record
-    printtable(res, header=false)
-end
diff --git a/benchmark/results.csv b/benchmark/results.csv
index 803a8f1963..2f828b1374 100644
--- a/benchmark/results.csv
+++ b/benchmark/results.csv
@@ -1,27 +1,4 @@
 Category,Benchmark,Iterations,TotalWall,AverageWall,MaxWall,MinWall,Timestamp,JuliaHash,CodeHash,OS,CPUCores
-"DataArray Operations","sum(v): Vector with no NA's",10,0.00857686996459961,0.000857686996459961,0.0008759498596191406,0.0008528232574462891,"2013-01-14 10:20:09","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(dv): DataVector with no NA's",10,0.01034688949584961,0.001034688949584961,0.0015439987182617188,0.0009601116180419922,"2013-01-14 10:20:09","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(removeNA(dvna)): DataVector with NA's",10,0.3172330856323242,0.031723308563232425,0.03600907325744629,0.031102895736694336,"2013-01-14 10:20:10","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dv .> 1.0 : DataVector with no NA's",10,2.0662119388580322,0.20662119388580322,0.2186141014099121,0.19791007041931152,"2013-01-14 10:20:12","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dv[idxdv] : DataVector and DataVector indexing",10,1.309283971786499,0.1309283971786499,0.1430819034576416,0.12758612632751465,"2013-01-14 10:20:14","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(v): Vector with no NA's",10,0.008614063262939453,0.0008614063262939453,0.0008738040924072266,0.0008530616760253906,"2013-01-14 10:20:14","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dv + 1.0 : DataVector with no NA's",10,0.06403565406799316,0.006403565406799316,0.018197059631347656,0.002321958541870117,"2013-01-14 10:20:14","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","v .> 1.0 : Vector",10,0.6535539627075195,0.06535539627075196,0.08094000816345215,0.059654951095581055,"2013-01-14 10:20:15","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dv[idxv] : DataVector and Vector indexing",10,1.0075325965881348,0.10075325965881347,0.10670304298400879,0.09811711311340332,"2013-01-14 10:20:16","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(removeNA(dv)): DataVector with no NA's",10,0.31229615211486816,0.031229615211486816,0.03513312339782715,0.029542922973632812,"2013-01-14 10:20:16","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","v[idxv] : Vector",10,0.17796993255615234,0.017796993255615234,0.03006291389465332,0.01340794563293457,"2013-01-14 10:20:16","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dvna .> 1.0 : DataVector with NA's",10,2.047502040863037,0.2047502040863037,0.21759915351867676,0.19756603240966797,"2013-01-14 10:20:19","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(dvna): DataVector with NA's",10,0.009662866592407227,0.0009662866592407227,0.0009889602661132812,0.0009551048278808594,"2013-01-14 10:20:19","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(*removeNA(dv)): DataVector with no NA's",10,0.33266425132751465,0.033266425132751465,0.03953218460083008,0.031510114669799805,"2013-01-14 10:20:19","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(*removeNA(dvna)): DataVector with NA's",10,0.3514890670776367,0.035148906707763675,0.04177212715148926,0.030744075775146484,"2013-01-14 10:20:20","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","dvna + 1.0 : DataVector with NA's",10,0.06465601921081543,0.006465601921081543,0.018489837646484375,0.0023229122161865234,"2013-01-14 10:20:20","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(*removeNA(dvna)): DataVector with NA's",10,0.3297770023345947,0.03297770023345947,0.039092063903808594,0.031161069869995117,"2013-01-14 10:20:20","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(dv): DataVector with no NA's",10,0.009810924530029297,0.0009810924530029297,0.0010459423065185547,0.0009570121765136719,"2013-01-14 10:20:20","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(removeNA(dvna)): DataVector with NA's",10,0.3214747905731201,0.03214747905731201,0.03607583045959473,0.03061199188232422,"2013-01-14 10:20:20","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(removeNA(dv)): DataVector with no NA's",10,0.31479310989379883,0.03147931098937988,0.03677701950073242,0.030520200729370117,"2013-01-14 10:20:21","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","v + 1.0 : Vector",10,0.06811809539794922,0.006811809539794922,0.030359983444213867,0.0016188621520996094,"2013-01-14 10:20:21","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(*removeNA(dv)): DataVector with no NA's",10,0.313723087310791,0.0313723087310791,0.036063194274902344,0.030745983123779297,"2013-01-14 10:20:21","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","mean(dvna): DataVector with NA's",10,0.010206937789916992,0.0010206937789916993,0.001260995864868164,0.0009670257568359375,"2013-01-14 10:20:21","9e0ff15b52","61162cd918","Darwin",4
 "Linear Algebra","Matrix Multiplication w/ No NA's",1000,0.1434164047241211,0.0001434164047241211,0.03787708282470703,5.888938903808594e-5,"2013-01-14 10:20:22","9e0ff15b52","61162cd918","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ No NA's",1000,2.0335049629211426,0.0020335049629211428,0.025124788284301758,0.001116037368774414,"2013-01-14 10:20:24","9e0ff15b52","61162cd918","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ NA's",1000,4.978086709976196,0.004978086709976196,0.038012027740478516,0.003064870834350586,"2013-01-14 10:20:29","9e0ff15b52","61162cd918","Darwin",4
@@ -40,29 +17,6 @@ Category,Benchmark,Iterations,TotalWall,AverageWall,MaxWall,MinWall,Timestamp,Ju
 "DataFrame I/O","space_before_delimiter.csv",10,0.007896661758422852,0.0007896661758422851,0.0009069442749023438,0.0005939006805419922,"2013-01-14 10:21:54","9e0ff15b52","61162cd918","Darwin",4
 "DataFrame I/O","types.csv",10,0.01001596450805664,0.001001596450805664,0.0011119842529296875,0.0007750988006591797,"2013-01-14 10:21:54","9e0ff15b52","61162cd918","Darwin",4
 "DataFrame I/O","utf8.csv",10,0.007441043853759766,0.0007441043853759766,0.0008280277252197266,0.0007090568542480469,"2013-01-14 10:21:54","9e0ff15b52","61162cd918","Darwin",4
-"DataArray Operations","sum(v): Vector with no NA's",10,0.009074211120605469,0.0009074211120605469,0.0009720325469970703,0.0008630752563476562,"2013-01-14 10:44:57","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(dv): DataVector with no NA's",10,0.00978994369506836,0.000978994369506836,0.0010378360748291016,0.0009520053863525391,"2013-01-14 10:44:57","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(removeNA(dvna)): DataVector with NA's",10,0.370542049407959,0.0370542049407959,0.07280802726745605,0.032627105712890625,"2013-01-14 10:44:58","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dv .> 1.0 : DataVector with no NA's",10,2.0874040126800537,0.20874040126800536,0.22231101989746094,0.2008979320526123,"2013-01-14 10:45:00","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dv[idxdv] : DataVector and DataVector indexing",10,1.3245141506195068,0.13245141506195068,0.13637208938598633,0.12865090370178223,"2013-01-14 10:45:02","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(v): Vector with no NA's",10,0.010338783264160156,0.0010338783264160155,0.001631021499633789,0.0008609294891357422,"2013-01-14 10:45:02","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dv + 1.0 : DataVector with no NA's",10,0.06729602813720703,0.006729602813720703,0.019953012466430664,0.0023021697998046875,"2013-01-14 10:45:02","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","v .> 1.0 : Vector",10,0.6516821384429932,0.06516821384429931,0.07804393768310547,0.059031009674072266,"2013-01-14 10:45:03","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dv[idxv] : DataVector and Vector indexing",10,1.0018041133880615,0.10018041133880615,0.10875797271728516,0.09689211845397949,"2013-01-14 10:45:04","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(removeNA(dv)): DataVector with no NA's",10,0.3324899673461914,0.03324899673461914,0.03690004348754883,0.029747962951660156,"2013-01-14 10:45:04","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","v[idxv] : Vector",10,0.21311020851135254,0.021311020851135253,0.03587007522583008,0.01586008071899414,"2013-01-14 10:45:05","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dvna .> 1.0 : DataVector with NA's",10,2.057245969772339,0.2057245969772339,0.21527600288391113,0.19754600524902344,"2013-01-14 10:45:07","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(dvna): DataVector with NA's",10,0.010241031646728516,0.0010241031646728515,0.0012030601501464844,0.0009589195251464844,"2013-01-14 10:45:07","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(*removeNA(dv)): DataVector with no NA's",10,0.32787513732910156,0.032787513732910153,0.03671598434448242,0.03182506561279297,"2013-01-14 10:45:07","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(*removeNA(dvna)): DataVector with NA's",10,0.3225698471069336,0.03225698471069336,0.03831791877746582,0.03119802474975586,"2013-01-14 10:45:08","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","dvna + 1.0 : DataVector with NA's",10,0.0656731128692627,0.006567311286926269,0.0189821720123291,0.002315044403076172,"2013-01-14 10:45:08","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(*removeNA(dvna)): DataVector with NA's",10,0.33132195472717285,0.033132195472717285,0.04097390174865723,0.031419992446899414,"2013-01-14 10:45:08","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(dv): DataVector with no NA's",10,0.009993314743041992,0.0009993314743041992,0.0010750293731689453,0.0009589195251464844,"2013-01-14 10:45:08","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(removeNA(dvna)): DataVector with NA's",10,0.32796382904052734,0.032796382904052734,0.03624391555786133,0.030983924865722656,"2013-01-14 10:45:09","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(removeNA(dv)): DataVector with no NA's",10,0.3377056121826172,0.03377056121826172,0.03966093063354492,0.03125596046447754,"2013-01-14 10:45:09","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","v + 1.0 : Vector",10,0.06936144828796387,0.0069361448287963865,0.030318021774291992,0.0016639232635498047,"2013-01-14 10:45:09","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(*removeNA(dv)): DataVector with no NA's",10,0.325742244720459,0.0325742244720459,0.035830020904541016,0.031311988830566406,"2013-01-14 10:45:09","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","mean(dvna): DataVector with NA's",10,0.009889841079711914,0.0009889841079711915,0.0011150836944580078,0.0009551048278808594,"2013-01-14 10:45:09","11f365ef08","319eab675d","Darwin",4
 "Linear Algebra","Matrix Multiplication w/ No NA's",1000,0.16434788703918457,0.00016434788703918458,0.03700113296508789,5.888938903808594e-5,"2013-01-14 10:45:10","11f365ef08","319eab675d","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ No NA's",1000,2.043034553527832,0.002043034553527832,0.02640819549560547,0.0011169910430908203,"2013-01-14 10:45:12","11f365ef08","319eab675d","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ NA's",1000,5.303671598434448,0.005303671598434448,0.04494500160217285,0.0030400753021240234,"2013-01-14 10:45:17","11f365ef08","319eab675d","Darwin",4
@@ -81,29 +35,6 @@ Category,Benchmark,Iterations,TotalWall,AverageWall,MaxWall,MinWall,Timestamp,Ju
 "DataFrame I/O","space_before_delimiter.csv",10,0.005663871765136719,0.0005663871765136719,0.0005869865417480469,0.0005459785461425781,"2013-01-14 10:46:44","11f365ef08","319eab675d","Darwin",4
 "DataFrame I/O","types.csv",10,0.006924867630004883,0.0006924867630004883,0.0007150173187255859,0.0006740093231201172,"2013-01-14 10:46:44","11f365ef08","319eab675d","Darwin",4
 "DataFrame I/O","utf8.csv",10,0.007310152053833008,0.0007310152053833008,0.0008001327514648438,0.0007109642028808594,"2013-01-14 10:46:44","11f365ef08","319eab675d","Darwin",4
-"DataArray Operations","sum(v): Vector with no NA's",10,0.008659124374389648,0.0008659124374389649,0.0009109973907470703,0.0008530616760253906,"2013-01-14 21:09:54","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(dv): DataVector with no NA's",10,0.010535955429077148,0.001053595542907715,0.0016469955444335938,0.0009620189666748047,"2013-01-14 21:09:55","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(removeNA(dvna)): DataVector with NA's",10,0.31494879722595215,0.031494879722595216,0.036512136459350586,0.0302579402923584,"2013-01-14 21:09:55","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dv .> 1.0 : DataVector with no NA's",10,2.0206620693206787,0.20206620693206787,0.21630287170410156,0.19572210311889648,"2013-01-14 21:09:57","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dv[idxdv] : DataVector and DataVector indexing",10,1.2879621982574463,0.12879621982574463,0.13094496726989746,0.1274411678314209,"2013-01-14 21:09:59","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(v): Vector with no NA's",10,0.010784149169921875,0.0010784149169921875,0.0013751983642578125,0.0008599758148193359,"2013-01-14 21:09:59","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dv + 1.0 : DataVector with no NA's",10,0.06326985359191895,0.006326985359191894,0.018002986907958984,0.002331972122192383,"2013-01-14 21:09:59","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","v .> 1.0 : Vector",10,0.6304340362548828,0.06304340362548828,0.07402396202087402,0.058135032653808594,"2013-01-14 21:10:00","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dv[idxv] : DataVector and Vector indexing",10,0.9911110401153564,0.09911110401153564,0.10813021659851074,0.09464001655578613,"2013-01-14 21:10:01","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(removeNA(dv)): DataVector with no NA's",10,0.323793888092041,0.0323793888092041,0.03618288040161133,0.03157186508178711,"2013-01-14 21:10:01","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","v[idxv] : Vector",10,0.1790471076965332,0.01790471076965332,0.030202150344848633,0.013410091400146484,"2013-01-14 21:10:02","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dvna .> 1.0 : DataVector with NA's",10,2.020591974258423,0.20205919742584227,0.21394085884094238,0.19475698471069336,"2013-01-14 21:10:04","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(dvna): DataVector with NA's",10,0.009839057922363281,0.0009839057922363282,0.0010139942169189453,0.000965118408203125,"2013-01-14 21:10:04","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(*removeNA(dv)): DataVector with no NA's",10,0.3176560401916504,0.03176560401916504,0.036063194274902344,0.02964496612548828,"2013-01-14 21:10:04","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(*removeNA(dvna)): DataVector with NA's",10,0.3231468200683594,0.03231468200683594,0.03954195976257324,0.029547929763793945,"2013-01-14 21:10:05","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","dvna + 1.0 : DataVector with NA's",10,0.06484484672546387,0.006484484672546387,0.018603086471557617,0.002290964126586914,"2013-01-14 21:10:05","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(*removeNA(dvna)): DataVector with NA's",10,0.32024693489074707,0.032024693489074704,0.036015987396240234,0.0309140682220459,"2013-01-14 21:10:05","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(dv): DataVector with no NA's",10,0.009638071060180664,0.0009638071060180664,0.0009829998016357422,0.0009531974792480469,"2013-01-14 21:10:05","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(removeNA(dvna)): DataVector with NA's",10,0.3199589252471924,0.03199589252471924,0.03625988960266113,0.030423879623413086,"2013-01-14 21:10:06","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(removeNA(dv)): DataVector with no NA's",10,0.31856274604797363,0.03185627460479736,0.03632402420043945,0.031010150909423828,"2013-01-14 21:10:06","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","v + 1.0 : Vector",10,0.07034850120544434,0.007034850120544433,0.030943870544433594,0.0016410350799560547,"2013-01-14 21:10:06","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","sum(*removeNA(dv)): DataVector with no NA's",10,0.3294239044189453,0.03294239044189453,0.03928399085998535,0.030253887176513672,"2013-01-14 21:10:06","3549f803f9","3b67c77708","Darwin",4
-"DataArray Operations","mean(dvna): DataVector with NA's",10,0.010222911834716797,0.0010222911834716797,0.0012698173522949219,0.0009789466857910156,"2013-01-14 21:10:06","3549f803f9","3b67c77708","Darwin",4
 "Linear Algebra","Matrix Multiplication w/ No NA's",1000,0.1328754425048828,0.00013287544250488282,0.011707067489624023,5.888938903808594e-5,"2013-01-14 21:10:07","3549f803f9","3b67c77708","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ No NA's",1000,1.9026072025299072,0.0019026072025299073,0.026885986328125,0.0011301040649414062,"2013-01-14 21:10:09","3549f803f9","3b67c77708","Darwin",4
 "Linear Algebra","DataMatrix Multiplication w/ NA's",1000,4.693065643310547,0.004693065643310547,0.034111976623535156,0.0031609535217285156,"2013-01-14 21:10:14","3549f803f9","3b67c77708","Darwin",4
diff --git a/benchmark/runbenchmarks.jl b/benchmark/runbenchmarks.jl
index fa9859b406..adbcef595e 100644
--- a/benchmark/runbenchmarks.jl
+++ b/benchmark/runbenchmarks.jl
@@ -5,9 +5,7 @@
 using DataFrames
 using Benchmark
 
-benchmarks = ["datavector.jl",
-              "datamatrix.jl",
-              "io.jl"]
+benchmarks = [ "io.jl"]
 
 # TODO: Print summary to stdout_stream, while printing results
 #       to file with appends.
diff --git a/docs/make.jl b/docs/make.jl
index cfd5a2df8a..467ceb2f0f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,4 +1,4 @@
-using Documenter, DataFrames, DataArrays
+using Documenter, DataFrames
 
 # Build documentation.
 # ====================
@@ -6,18 +6,41 @@ using Documenter, DataFrames, DataArrays
 makedocs(
     # options
     modules = [DataFrames],
-    doctest = false,
-    clean   = false
+    doctest = true,
+    clean = false,
+    sitename = "DataFrames.jl",
+    format = Documenter.Formats.HTML,
+    pages = Any[
+        "Introduction" => "index.md",
+        "User Guide" => Any[
+            "Getting Started" => "man/getting_started.md",
+            "IO" => "man/io.md",
+            "Joins" => "man/joins.md",
+            "Split-apply-combine" => "man/split_apply_combine.md",
+            "Reshaping" => "man/reshaping_and_pivoting.md",
+            "Sorting" => "man/sorting.md",
+            "Formulas" => "man/formulas.md",
+            "Pooling" => "man/pooling.md",
+        ],
+        "API" => Any[
+            "Main types" => "lib/maintypes.md",
+            "Utilities" => "lib/utilities.md",
+            "Data manipulation" => "lib/manipulation.md",
+        ],
+        "About" => Any[
+            "Release Notes" => "NEWS.md",
+            "License" => "LICENSE.md",
+        ]
+    ]
 )
 
 # Deploy built documentation from Travis.
 # =======================================
 
-# Needs to install an additional dep, mkdocs-material, so provide a custom `deps`.
-custom_deps() = run(`pip install --user pygments mkdocs mkdocs-material`)
-
 deploydocs(
     # options
-    deps = custom_deps,
-    repo = "github.com/JuliaStats/DataFrames.jl.git"
+    repo = "github.com/JuliaStats/DataFrames.jl.git",
+    target = "build",
+    deps = nothing,
+    make = nothing,
 )
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
deleted file mode 100644
index ea5c0f8695..0000000000
--- a/docs/mkdocs.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-
-site_name: DataFrames.jl
-site_description: package for working with tabular data in Julia
-repo_url: https://github.com/JuliaStats/DataFrames.jl
-
-theme: material
-
-extra:
-  palette:
-    primary: 'indigo'
-    accent:  'blue'
-
-extra_css:
-  - assets/Documenter.css
-
-markdown_extensions:
-  - codehilite
-  - extra
-  - tables
-  - fenced_code
-
-extra_javascript:
-  - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML
-  - assets/mathjaxhelper.js
-
-docs_dir: 'build'
-
-pages:
-- [index.md, Introduction]
-- [man/getting_started.md, User guide, Getting Started]
-- [man/io.md, User guide, IO]
-- [man/joins.md, User guide, Joins]
-- [man/split_apply_combine.md, User guide, Split-apply-combine]
-- [man/reshaping_and_pivoting.md, User guide, Reshaping]
-- [man/sorting.md, User guide, Sorting]
-- [man/formulas.md, User guide, Formulas]
-- [man/pooling.md, User guide, Pooling]
-- [lib/maintypes.md, API, Main types]
-- [lib/utilities.md, API, Utilities]
-- [lib/manipulation.md, API, Data manipulation]
-- [NEWS.md, About, Release notes]
-- [LICENSE.md, About, License]
-
diff --git a/docs/src/index.md b/docs/src/index.md
index 9a08a33548..7943a0597d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,17 +2,20 @@
 
 ## Package Manual
 
-    {contents}
-    Pages = ["man/getting_started.md", "man/io.md", "man/joins.md", "man/split_apply_combine.md", "man/reshaping_and_pivoting.md", "man/sorting.md", "man/formulas.md", "man/pooling.md"]
-    Depth = 2
+```@contents
+Pages = ["man/getting_started.md", "man/io.md", "man/joins.md", "man/split_apply_combine.md", "man/reshaping_and_pivoting.md", "man/sorting.md", "man/formulas.md", "man/pooling.md"]
+Depth = 2
+```
 
 ## API
 
-    {contents}
-    Pages = ["lib/maintypes.md", "lib/manipulation.md", "lib/utilities.md"]
-    Depth = 2
+```@contents
+Pages = ["lib/maintypes.md", "lib/manipulation.md", "lib/utilities.md"]
+Depth = 2
+```
 
 ## Documentation Index
 
-    {index}
-    Pages = ["lib/maintypes.md", "lib/manipulation.md", "lib/utilities.md", "man/io.md"]
+```@index
+Pages = ["lib/maintypes.md", "lib/manipulation.md", "lib/utilities.md", "man/io.md"]
+```
diff --git a/docs/src/lib/maintypes.md b/docs/src/lib/maintypes.md
index 8b8f300283..ccc62d530c 100644
--- a/docs/src/lib/maintypes.md
+++ b/docs/src/lib/maintypes.md
@@ -1,16 +1,16 @@
 
-    {meta}
-    CurrentModule = DataFrames
+```@meta
+CurrentModule = DataFrames
+```
 
 # Main Types
 
-    {index}
-    Pages = ["maintypes.md"]
-
-...
-    
-    {docs}
-    AbstractDataFrame
-    DataFrame
-    SubDataFrame
+```@index
+Pages = ["maintypes.md"]
+```
 
+```@docs
+AbstractDataFrame
+DataFrame
+SubDataFrame
+```
diff --git a/docs/src/lib/manipulation.md b/docs/src/lib/manipulation.md
index dae992c48b..1f9f578d25 100644
--- a/docs/src/lib/manipulation.md
+++ b/docs/src/lib/manipulation.md
@@ -1,22 +1,25 @@
-
-    {meta}
-    CurrentModule = DataFrames
+```@meta
+CurrentModule = DataFrames
+```
 
 # Data Manipulation
-    
-    {index}
-    Pages = ["manipulation.md"]
+
+```@index
+Pages = ["manipulation.md"]
+```
 
 ## Joins
 
-    {docs}
-    join
-    
+```@docs
+join
+```
+
 ## Reshaping
 
-    {docs}
-    melt 
-    stack
-    unstack
-    stackdf
-    meltdf
+```@docs
+melt
+stack
+unstack
+stackdf
+meltdf
+```
diff --git a/docs/src/lib/utilities.md b/docs/src/lib/utilities.md
index 6b2d8d2564..23c9c76d65 100644
--- a/docs/src/lib/utilities.md
+++ b/docs/src/lib/utilities.md
@@ -1,27 +1,25 @@
-
-    {meta}
-    CurrentModule = DataFrames
+```@meta
+CurrentModule = DataFrames
+```
 
 # Utilities
 
-    {index}
-    Pages = ["utilities.md"]
+```@index
+Pages = ["utilities.md"]
+```
 
-...
-    
-    {docs}
-    eltypes
-    head
-    complete_cases
-    complete_cases!
-    describe
-    dump
-    names!
-    nonunique
-    rename
-    rename!
-    tail
-    unique
-    unique!
-    
-    
+```@docs
+eltypes
+head
+complete_cases
+complete_cases!
+describe
+dump
+names!
+nonunique
+rename
+rename!
+tail
+unique
+unique!
+```
diff --git a/docs/src/man/formulas.md b/docs/src/man/formulas.md
index e9203439e8..c283a696a8 100644
--- a/docs/src/man/formulas.md
+++ b/docs/src/man/formulas.md
@@ -33,7 +33,7 @@ If you would like to specify both main effects and an interaction term at once,
 mm = ModelMatrix(ModelFrame(Z ~ X*Y, df))
 ```
 
-You can control how categorical variables (e.g., `PooledDataArray` columns) are converted to `ModelMatrix` columns by specifying _contrasts_ when you construct a `ModelFrame`:
+You can control how categorical variables (e.g., `CategoricalArray` columns) are converted to `ModelMatrix` columns by specifying _contrasts_ when you construct a `ModelFrame`:
 
 ```julia
 mm = ModelMatrix(ModelFrame(Z ~ X*Y, df, contrasts = Dict(:X => HelmertCoding())))
@@ -47,4 +47,3 @@ contrasts!(mf, X = HelmertCoding())
 ```
 
 The construction of model matrices makes it easy to formulate complex statistical models. These are used to good effect by the [GLM Package.](https://github.com/JuliaStats/GLM.jl)
-
diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md
index 89a2f7e101..b4fc4e1a17 100644
--- a/docs/src/man/getting_started.md
+++ b/docs/src/man/getting_started.md
@@ -2,75 +2,75 @@
 
 ## Installation
 
-The DataFrames package is available through the Julia package system. Throughout the rest of this tutorial, we will assume that you have installed the DataFrames package and have already typed `using DataArrays, DataFrames` to bring all of the relevant variables into your current namespace. In addition, we will make use of the `RDatasets` package, which provides access to hundreds of classical data sets.
+The DataFrames package is available through the Julia package system. Throughout the rest of this tutorial, we will assume that you have installed the DataFrames package and have already typed `using NullableArrays, DataFrames` to bring all of the relevant variables into your current namespace. In addition, we will make use of the `RDatasets` package, which provides access to hundreds of classical data sets.
 
-## The `NA` Value
+## The `Nullable` Type
 
-To get started, let's examine the `NA` value. Type the following into the REPL:
+To get started, let's examine the `Nullable` type. Objects of this type can either hold a value, or represent a missing value (`null`). For example, this is a `Nullable` holding the integer `1`:
 
 ```julia
-NA
+Nullable(1)
 ```
 
-One of the essential properties of `NA` is that it poisons other items. To see this, try to add something like `1` to `NA`:
-
+And this represents a missing value:
 ```julia
-1 + NA
+Nullable()
 ```
 
-## The `DataArray` Type
-
-Now that we see that `NA` is working, let's insert one into a `DataArray`. We'll create one now using the `@data` macro:
+`Nullable` objects support all standard operators, which return another `Nullable`. One of the essential properties of `null` values is that they poison other items. To see this, try to add something like `Nullable(1)` to `Nullable()`:
 
 ```julia
-dv = @data([NA, 3, 2, 5, 4])
+Nullable(1) + Nullable()
 ```
 
-To see how `NA` poisons even complex calculations, let's try to take the mean of the five numbers stored in `dv`:
+Note that operations mixing `Nullable` and scalars (e.g. `1 + Nullable()`) are not supported.
+
+## The `NullableArray` Type
+
+`Nullable` objects can be stored in a standard `Array` just like any value:
 
 ```julia
-mean(dv)
+v = Nullable{Int}[1, 3, 4, 5, 4]
 ```
 
-In many cases we're willing to just ignore `NA` values and remove them from our vector. We can do that using the `dropna` function:
+But arrays of `Nullable` are inefficient, both in terms of computation costs and of memory use. `NullableArrays` provide a more efficient storage, and behave like `Array{Nullable}` objects.
 
 ```julia
-dropna(dv)
-mean(dropna(dv))
+nv = NullableArray(Nullable{Int}[Nullable(), 3, 2, 5, 4])
 ```
 
-Instead of removing `NA` values, you can try to conver the `DataArray` into a normal Julia `Array` using `convert`:
+In many cases we're willing to just ignore missing values and remove them from our vector. We can do that using the `dropnull` function:
 
 ```julia
-convert(Array, dv)
+dropnull(nv)
+mean(dropnull(nv))
 ```
 
-This fails in the presence of `NA` values, but will succeed if there are no `NA` values:
+Instead of removing `null` values, you can try to convert the `NullableArray` into a normal Julia `Array` using `convert`:
 
 ```julia
-dv[1] = 3
-convert(Array, dv)
+convert(Array, nv)
 ```
 
-In addition to removing `NA` values and hoping they won't occur, you can also replace any `NA` values using the `convert` function, which takes a replacement value as an argument:
+This fails in the presence of `null` values, but will succeed if there are no `null` values:
 
 ```julia
-dv = @data([NA, 3, 2, 5, 4])
-mean(convert(Array, dv, 11))
+nv[1] = 3
+convert(Array, nv)
 ```
 
-Which strategy for dealing with `NA` values is most appropriate will typically depend on the specific details of your data analysis pathway.
-
-Although the examples above employed only 1D `DataArray` objects, the `DataArray` type defines a completely generic N-dimensional array type. Operations on generic `DataArray` objects work in higher dimensions in the same way that they work on Julia's Base `Array` type:
+In addition to removing `null` values and hoping they won't occur, you can also replace any `null` values using the `convert` function, which takes a replacement value as an argument:
 
 ```julia
-dm = @data([NA 0.0; 0.0 1.0])
-dm * dm
+nv = NullableArray(Nullable{Int}[Nullable(), 3, 2, 5, 4])
+mean(convert(Array, nv, 0))
 ```
 
+Which strategy for dealing with `null` values is most appropriate will typically depend on the specific details of your data analysis pathway.
+
 ## The `DataFrame` Type
 
-The `DataFrame` type can be used to represent data tables, each column of which is a `DataArray`. You can specify the columns using keyword arguments:
+The `DataFrame` type can be used to represent data tables, each column of which is an array (by default, a `NullableArray`). You can specify the columns using keyword arguments:
 
 ```julia
 df = DataFrame(A = 1:4, B = ["M", "F", "F", "M"])
@@ -110,22 +110,22 @@ describe(df)
 To focus our search, we start looking at just the means and medians of specific columns. In the example below, we use numeric indexing to access the columns of the `DataFrame`:
 
 ```julia
-mean(df[1])
-median(df[1])
+mean(dropnull(df[1]))
+median(dropnull(df[1]))
 ```
 
 We could also have used column names to access individual columns:
 
 ```julia
-mean(df[:A])
-median(df[:A])
+mean(dropnull(df[:A]))
+median(dropnull(df[:A]))
 ```
 
 We can also apply a function to each column of a `DataFrame` with the `colwise` function. For example:
 
 ```julia
 df = DataFrame(A = 1:4, B = randn(4))
-colwise(cumsum, df)
+colwise(c->cumsum(dropnull(c)), df)
 ```
 
 ## Accessing Classic Data Sets
@@ -135,10 +135,15 @@ To see more of the functionality for working with `DataFrame` objects, we need a
 For example, we can access Fisher's iris data set using the following functions:
 
 ```julia
-using RDatasets
-iris = dataset("datasets", "iris")
+iris = readtable(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"))
 head(iris)
 ```
 
 In the next section, we'll discuss generic I/O strategy for reading and writing `DataFrame` objects that you can use to import and export your own data files.
 
+## Querying DataFrames
+
+While the `DataFrames` package provides basic data manipulation capabilities, users are encouraged to use the following packages for more powerful and complete data querying functionality in the spirit of [dplyr](https://github.com/hadley/dplyr) and [LINQ](https://msdn.microsoft.com/en-us/library/bb397926.aspx):
+
+- [DataFramesMeta.jl](https://github.com/JuliaStats/DataFramesMeta.jl) provides metaprogramming tools for `DataFrames` and associative objects. These macros improve performance and provide more convenient syntax.
+- [Query.jl](https://github.com/davidanthoff/Query.jl) provides a LINQ like interface to a large number of data sources, including `DataFrame` instances.
diff --git a/docs/src/man/io.md b/docs/src/man/io.md
index fdb0869355..0ebcc94d60 100644
--- a/docs/src/man/io.md
+++ b/docs/src/man/io.md
@@ -4,8 +4,9 @@
 
 To read data from a CSV-like file, use the `readtable` function:
 
-    {docs}
-    readtable
+```@docs
+readtable
+```
 
 `readtable` requires that you specify the path of the file that you would like to read as a `String`. To read data from a non-file source, you may also supply an `IO` object. It supports many additional keyword arguments: these are documented in the section on advanced I/O operations.
 
@@ -13,8 +14,9 @@ To read data from a CSV-like file, use the `readtable` function:
 
 To write data to a CSV file, use the `writetable` function:
 
-    {docs}
-    writetable
+```@docs
+writetable
+```
 
 ## Supplying `DataFrame`s inline with non-standard string literals
 
diff --git a/docs/src/man/joins.md b/docs/src/man/joins.md
index 558bf317e3..c152ee9fa3 100644
--- a/docs/src/man/joins.md
+++ b/docs/src/man/joins.md
@@ -15,7 +15,7 @@ full = join(names, jobs, on = :ID)
 
 Output:
 
-| Row | ID | Name       | Job      | 
+| Row | ID | Name       | Job      |
 |-----|----|------------|----------|
 | 1   | 1  | "John Doe" | "Lawyer" |
 | 2   | 1  | "Jane Doe" | "Doctor" |
diff --git a/docs/src/man/pooling.md b/docs/src/man/pooling.md
index 17757d8526..fcffaaba29 100644
--- a/docs/src/man/pooling.md
+++ b/docs/src/man/pooling.md
@@ -1,44 +1,49 @@
-# Pooling Data (Representing Factors)
+# Categorical Data
 
 Often, we have to deal with factors that take on a small number of levels:
 
 ```julia
-dv = @data(["Group A", "Group A", "Group A",
-            "Group B", "Group B", "Group B"])
+v = ["Group A", "Group A", "Group A",
+     "Group B", "Group B", "Group B"]
 ```
 
-The naive encoding used in a `DataArray` represents every entry of this vector as a full string. In contrast, we can represent the data more efficiently by replacing the strings with indices into a small pool of levels. This is what the `PooledDataArray` does:
+The naive encoding used in an `Array` or in a `NullableArray` represents every entry of this vector as a full string. In contrast, we can represent the data more efficiently by replacing the strings with indices into a small pool of levels. This is what the `CategoricalArray` type does:
 
 ```julia
-pdv = @pdata(["Group A", "Group A", "Group A",
-              "Group B", "Group B", "Group B"])
+cv = CategoricalArray(["Group A", "Group A", "Group A",
+                       "Group B", "Group B", "Group B"])
 ```
 
-In addition to representing repeated data efficiently, the `PooledDataArray` allows us to determine the levels of the factor at any time using the `levels` function:
+A companion type, `NullableCategoricalArray`, allows storing missing values in the array: is to `CategoricalArray` what `NullableArray` is to the standard `Array` type.
+
+In addition to representing repeated data efficiently, the `CategoricalArray` type allows us to determine efficiently the allowed levels of the variable at any time using the `levels` function (note that levels may or may not be actually used in the data):
 
 ```julia
-levels(pdv)
+levels(cv)
 ```
 
-By default, a `PooledDataArray` is able to represent 2<sup>32</sup>differents levels. You can use less memory by calling the `compact` function:
+The `levels!` function also allows changing the order of appearance of the levels, which can be useful for display purposes or when working with ordered variables.
+
+By default, a `CategoricalArray` is able to represent 2<sup>32</sup>differents levels. You can use less memory by calling the `compact` function:
 
 ```julia
-pdv = compact(pdv)
+cv = compact(cv)
 ```
 
-Often, you will have factors encoded inside a DataFrame with `DataArray` columns instead of `PooledDataArray` columns. You can do conversion of a single column using the `pool` function:
+Often, you will have factors encoded inside a DataFrame with `Array` or `NullableArray` columns instead of `CategoricalArray` or `NullableCategoricalArray` columns. You can do conversion of a single column using the `categorical` function:
 
 ```julia
-pdv = pool(dv)
+cv = categorical(v)
 ```
 
-Or you can edit the columns of a `DataFrame` in-place using the `pool!` function:
+Or you can edit the columns of a `DataFrame` in-place using the `categorical!` function:
 
 ```julia
 df = DataFrame(A = [1, 1, 1, 2, 2, 2],
                B = ["X", "X", "X", "Y", "Y", "Y"])
-pool!(df, [:A, :B])
+categorical!(df, [:A, :B])
 ```
 
-Pooling columns is important for working with the [GLM package](https://github.com/JuliaStats/GLM.jl) When fitting regression models, `PooledDataArray` columns in the input are translated into 0/1 indicator columns in the `ModelMatrix` with one column for each of the levels of the `PooledDataArray`. This allows one to analyze categorical data efficiently.
+Using categorical arrays is important for working with the [GLM package](https://github.com/JuliaStats/GLM.jl). When fitting regression models, `CategoricalArray` and `NullableCategoricalArray` columns in the input are translated into 0/1 indicator columns in the `ModelMatrix` with one column for each of the levels of the `CategoricalArray`/`NullableCategoricalArray`. This allows one to analyze categorical data efficiently.
 
+See the [CategoricalArrays package](https://github.com/nalimilan/CategoricalArrays.jl) for more information regarding categorical arrays.
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index 959dd00bfb..dcd02c70d9 100644
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -3,10 +3,10 @@
 Reshape data from wide to long format using the `stack` function:
 
 ```julia
-using DataFrames, RDatasets
-iris = dataset("datasets", "iris")
+using DataFrames
+iris = readtable(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"))
 iris[:id] = 1:size(iris, 1)  # this makes it easier to unstack
-d = stack(iris, [1:4])
+d = stack(iris, 1:4)
 ```
 
 The second optional argument to `stack` indicates the columns to be stacked. These are normally referred to as the measured variables. Column names can also be given:
@@ -79,6 +79,6 @@ None of these reshaping functions perform any aggregation. To do aggregation, us
 
 ```julia
 d = stack(iris)
-x = by(d, [:variable, :Species], df -> DataFrame(vsum = mean(df[:value])))
+x = by(d, [:variable, :Species], df -> DataFrame(vsum = mean(dropnull(df[:value]))))
 unstack(x, :Species, :vsum)
 ```
diff --git a/docs/src/man/sorting.md b/docs/src/man/sorting.md
index 4d2140a258..14224a8b55 100644
--- a/docs/src/man/sorting.md
+++ b/docs/src/man/sorting.md
@@ -3,9 +3,8 @@
 Sorting is a fundamental component of data analysis. Basic sorting is trivial: just calling `sort!` will sort all columns, in place:
 
 ```julia
-using DataFrames, RDatasets
-
-iris = dataset("datasets", "iris")
+using DataFrames
+iris = readtable(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"))
 sort!(iris)
 ```
 
diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md
index 912279f3ac..8caa9b1b1e 100644
--- a/docs/src/man/split_apply_combine.md
+++ b/docs/src/man/split_apply_combine.md
@@ -7,12 +7,11 @@ The DataFrames package supports the Split-Apply-Combine strategy through the `by
 We show several examples of the `by` function applied to the `iris` dataset below:
 
 ```julia
-using DataFrames, RDatasets
-
-iris = dataset("datasets", "iris")
+using DataFrames
+iris = readtable(joinpath(Pkg.dir("DataFrames"), "test/data/iris.csv"))
 
 by(iris, :Species, size)
-by(iris, :Species, df -> mean(df[:PetalLength]))
+by(iris, :Species, df -> mean(dropnull(df[:PetalLength])))
 by(iris, :Species, df -> DataFrame(N = size(df, 1)))
 ```
 
@@ -20,7 +19,7 @@ The `by` function also support the `do` block form:
 
 ```julia
 by(iris, :Species) do df
-   DataFrame(m = mean(df[:PetalLength]), s² = var(df[:PetalLength]))
+   DataFrame(m = mean(dropnull(df[:PetalLength])), s² = var(dropnull(df[:PetalLength])))
 end
 ```
 
@@ -30,7 +29,7 @@ We show several examples of the `aggregate` function applied to the `iris` datas
 
 ```julia
 aggregate(iris, :Species, sum)
-aggregate(iris, :Species, [sum, mean])
+aggregate(iris, :Species, [sum, x->mean(dropnull(x))])
 ```
 
 If you only want to split the data set into subsets, use the `groupby` function:
diff --git a/docs/src/man/subsets.md b/docs/src/man/subsets.md
index 2049d5cd1a..a1a899a848 100644
--- a/docs/src/man/subsets.md
+++ b/docs/src/man/subsets.md
@@ -1,59 +1,12 @@
 # Subsets
 
-## DataArrays
-
-The `DataArray` type is meant to behave like a standard Julia `Array` and tries to implement identical indexing rules:
-
-One dimensional `DataArray`:
-
-```julia
-julia> using DataArrays
-
-julia> dv = data([1, 2, 3])
-3-element DataArray{Int64,1}:
- 1
- 2
- 3
-
-julia> dv[1]
-1
-
-julia> dv[2] = NA
-NA
-
-julia> dv[2]
-NA
-```
-
-Two dimensional `DataArray`:
-
-```julia
-julia> using DataArrays
-
-julia> dm = data([1 2; 3 4])
-2×2 DataArray{Int64,2}:
- 1  2
- 3  4
-
-julia> dm[1, 1]
-1
-
-julia> dm[2, 1] = NA
-NA
-
-julia> dm[2, 1]
-NA
-```
-
-DataFrames
-
-In contrast, a `DataFrame` offers substantially more forms of indexing because columns can be referred to by name:
+A `DataFrame` supports many forms of indexing.
 
 ```julia
 julia> using DataFrames
 
 julia> df = DataFrame(A = 1:10, B = 2:2:20)
-10×2 DataFrame
+10×2 DataFrames.DataFrame
 │ Row │ A  │ B  │
 ├─────┼────┼────┤
 │ 1   │ 1  │ 2  │
@@ -68,11 +21,11 @@ julia> df = DataFrame(A = 1:10, B = 2:2:20)
 │ 10  │ 10 │ 20 │
 ```
 
-Refering to the first column by index or name:
+Referring to the first column by index or name:
 
 ```julia
 julia> df[1]
-10-element DataArray{Int64,1}:
+10-element NullableArrays.NullableArray{Int64,1}:
   1
   2
   3
@@ -85,7 +38,7 @@ julia> df[1]
  10
 
 julia> df[:A]
-10-element DataArray{Int64,1}:
+10-element NullableArrays.NullableArray{Int64,1}:
   1
   2
   3
@@ -102,17 +55,17 @@ Refering to the first element of the first column:
 
 ```julia
 julia> df[1, 1]
-1
+Nullable{Int64}(1)
 
 julia> df[1, :A]
-1
+Nullable{Int64}(1)
 ```
 
 Selecting a subset of rows by index and an (ordered) subset of columns by name:
 
 ```julia
 julia> df[1:3, [:A, :B]]
-3×2 DataFrame
+3×2 DataFrames.DataFrame
 │ Row │ A │ B │
 ├─────┼───┼───┤
 │ 1   │ 1 │ 2 │
@@ -120,39 +73,10 @@ julia> df[1:3, [:A, :B]]
 │ 3   │ 3 │ 6 │
 
 julia> df[1:3, [:B, :A]]
-3×2 DataFrame
+3×2 DataFrames.DataFrame
 │ Row │ B │ A │
 ├─────┼───┼───┤
 │ 1   │ 2 │ 1 │
 │ 2   │ 4 │ 2 │
 │ 3   │ 6 │ 3 │
 ```
-
-Selecting a subset of rows by using a condition:
-
-```julia
-julia> df[df[:A] % 2 .== 0, :]
-5×2 DataFrame
-│ Row │ A  │ B  │
-├─────┼────┼────┤
-│ 1   │ 2  │ 4  │
-│ 2   │ 4  │ 8  │
-│ 3   │ 6  │ 12 │
-│ 4   │ 8  │ 16 │
-│ 5   │ 10 │ 20 │
-
-julia> df[df[:B] % 2 .== 0, :]
-10×2 DataFrame
-│ Row │ A  │ B  │
-├─────┼────┼────┤
-│ 1   │ 1  │ 2  │
-│ 2   │ 2  │ 4  │
-│ 3   │ 3  │ 6  │
-│ 4   │ 4  │ 8  │
-│ 5   │ 5  │ 10 │
-│ 6   │ 6  │ 12 │
-│ 7   │ 7  │ 14 │
-│ 8   │ 8  │ 16 │
-│ 9   │ 9  │ 18 │
-│ 10  │ 10 │ 20 │
-```
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
index 3c2a53003f..e19750286c 100644
--- a/src/DataFrames.jl
+++ b/src/DataFrames.jl
@@ -12,7 +12,8 @@ using Compat
 import Compat.String
 using Reexport
 @reexport using StatsBase
-@reexport using DataArrays
+@reexport using NullableArrays
+@reexport using CategoricalArrays
 using GZip
 using SortingAlgorithms
 
@@ -50,6 +51,7 @@ export @~,
 
        aggregate,
        by,
+       categorical!,
        coefnames,
        colwise,
        combine,
@@ -70,8 +72,6 @@ export @~,
        nrow,
        nullable!,
        order,
-       pool,
-       pool!,
        printtable,
        readtable,
        rename!,
@@ -82,9 +82,14 @@ export @~,
        unique!,
        unstack,
        writetable,
+       head,
+       tail,
 
        # Remove after deprecation period
-       read_rda
+       read_rda,
+       pool,
+       pool!
+
 
 ##############################################################################
 ##
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
index 3df74edcf7..6e7f9adedf 100644
--- a/src/abstractdataframe/abstractdataframe.jl
+++ b/src/abstractdataframe/abstractdataframe.jl
@@ -11,24 +11,24 @@ type in that it allows indexing by a key (the columns).
 
 The following are normally implemented for AbstractDataFrames:
 
-* [`describe`]({ref}) : summarize columns
-* [`dump`]({ref}) : show structure
+* [`describe`](@ref) : summarize columns
+* [`dump`](@ref) : show structure
 * `hcat` : horizontal concatenation
 * `vcat` : vertical concatenation
 * `names` : columns names
-* [`names!`]({ref}) : set columns names
-* [`rename!`]({ref}) : rename columns names based on keyword arguments
-* [`eltypes`]({ref}) : `eltype` of each column
+* [`names!`](@ref) : set columns names
+* [`rename!`](@ref) : rename columns names based on keyword arguments
+* [`eltypes`](@ref) : `eltype` of each column
 * `length` : number of columns
 * `size` : (nrows, ncols)
-* [`head`]({ref}) : first `n` rows
-* [`tail`]({ref}) : last `n` rows
+* [`head`](@ref) : first `n` rows
+* [`tail`](@ref) : last `n` rows
 * `convert` : convert to an array
-* `DataArray` : convert to a DataArray
-* [`complete_cases`]({ref}) : indexes of complete cases (rows with no NA's)
-* [`complete_cases!`]({ref}) : remove rows with NA's
-* [`nonunique`]({ref}) : indexes of duplicate rows
-* [`unique!`]({ref}) : remove duplicate rows
+* `NullableArray` : convert to a NullableArray
+* [`complete_cases`](@ref) : indexes of complete cases (rows with no NA's)
+* [`complete_cases!`](@ref) : remove rows with NA's
+* [`nonunique`](@ref) : indexes of duplicate rows
+* [`unique!`](@ref) : remove duplicate rows
 * `similar` : a DataFrame with similar columns as `d`
 
 **Indexing**
@@ -79,13 +79,17 @@ abstract AbstractDataFrame
 ##
 ##############################################################################
 
-immutable Cols{T <: AbstractDataFrame}
+immutable Cols{T <: AbstractDataFrame} <: AbstractVector{Any}
     df::T
 end
 Base.start(::Cols) = 1
 Base.done(itr::Cols, st) = st > length(itr.df)
 Base.next(itr::Cols, st) = (itr.df[st], st + 1)
 Base.length(itr::Cols) = length(itr.df)
+Base.size(itr::Cols, ix) = ix==1 ? length(itr) : throw(ArgumentError("Incorrect dimension"))
+Base.size(itr::Cols) = (length(itr.df),)
+Base.linearindexing{T}(::Type{Cols{T}}) = Base.LinearFast()
+Base.getindex(itr::Cols, inds...) = getindex(itr.df, inds...)
 
 # N.B. where stored as a vector, 'columns(x) = x.vector' is a bit cheaper
 columns{T <: AbstractDataFrame}(df::T) = Cols{T}(df)
@@ -175,7 +179,7 @@ rename!(df, @compat(Dict(:i=>:A, :x=>:X)))
 (rename!, rename)
 
 """
-Column elemental types
+Return element types of columns
 
 ```julia
 eltypes(df::AbstractDataFrame)
@@ -187,7 +191,7 @@ eltypes(df::AbstractDataFrame)
 
 **Result**
 
-* `::Vector{Type}` : the elemental type of each column
+* `::Vector{Type}` : the element type of each column
 
 **Examples**
 
@@ -197,14 +201,7 @@ eltypes(df)
 ```
 
 """
-function eltypes(df::AbstractDataFrame)
-    ncols = size(df, 2)
-    res = Array(Type, ncols)
-    for j in 1:ncols
-        res[j] = eltype(df[j])
-    end
-    return res
-end
+eltypes(df::AbstractDataFrame) = map!(eltype, Vector{Type}(size(df,2)), columns(df))
 
 Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
 function Base.size(df::AbstractDataFrame, i::Integer)
@@ -213,7 +210,7 @@ function Base.size(df::AbstractDataFrame, i::Integer)
     elseif i == 2
         ncol(df)
     else
-        throw(ArgumentError("DataFrames have only two dimensions"))
+        throw(ArgumentError("DataFrames only have two dimensions"))
     end
 end
 
@@ -231,21 +228,15 @@ Base.ndims(::AbstractDataFrame) = 2
 Base.similar(df::AbstractDataFrame, dims::Int) =
     DataFrame(Any[similar(x, dims) for x in columns(df)], copy(index(df)))
 
-nas{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =   # TODO move to datavector.jl?
-    DataArray(Array(T, dims), trues(dims))
-
-nas{T,R}(dv::PooledDataArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
-    PooledDataArray(DataArrays.RefArray(zeros(R, dims)), dv.pool)
-
-nas(df::AbstractDataFrame, dims::Int) =
-    DataFrame(Any[nas(x, dims) for x in columns(df)], copy(index(df)))
-
 ##############################################################################
 ##
 ## Equality
 ##
 ##############################################################################
 
+# Imported in DataFrames.jl for compatibility across Julia 0.4 and 0.5
+@compat(Base.:(==))(df1::AbstractDataFrame, df2::AbstractDataFrame) = isequal(df1, df2)
+
 function Base.isequal(df1::AbstractDataFrame, df2::AbstractDataFrame)
     size(df1, 2) == size(df2, 2) || return false
     isequal(index(df1), index(df2)) || return false
@@ -255,20 +246,6 @@ function Base.isequal(df1::AbstractDataFrame, df2::AbstractDataFrame)
     return true
 end
 
-# Imported in DataFrames.jl for compatibility across Julia 0.4 and 0.5
-function (==)(df1::AbstractDataFrame, df2::AbstractDataFrame)
-    size(df1, 2) == size(df2, 2) || return false
-    isequal(index(df1), index(df2)) || return false
-    eq = true
-    for idx in 1:size(df1, 2)
-        coleq = df1[idx] == df2[idx]
-        # coleq could be NA
-        !isequal(coleq, false) || return false
-        eq &= coleq
-    end
-    return eq
-end
-
 ##############################################################################
 ##
 ## Associative methods
@@ -285,10 +262,10 @@ Base.isempty(df::AbstractDataFrame) = ncol(df) == 0
 ##
 ##############################################################################
 
-DataArrays.head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
-DataArrays.head(df::AbstractDataFrame) = head(df, 6)
-DataArrays.tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
-DataArrays.tail(df::AbstractDataFrame) = tail(df, 6)
+head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
+head(df::AbstractDataFrame) = head(df, 6)
+tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
+tail(df::AbstractDataFrame) = tail(df, 6)
 
 """
 Show the first or last part of an AbstractDataFrame
@@ -341,7 +318,7 @@ dump(io::IO, df::AbstractDataFrame, n::Int = 5)
 
 ```julia
 df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
-str(df)
+dump(df)
 ```
 
 """
@@ -355,10 +332,6 @@ function Base.dump(io::IO, df::AbstractDataFrame, n::Int, indent)
     end
 end
 
-function Base.dump(io::IO, dv::AbstractDataVector, n::Int, indent)
-    println(io, typeof(dv), "(", length(dv), ") ", dv[1:min(4, end)])
-end
-
 # summarize the columns of a DF
 # TODO: clever layout in rows
 """
@@ -404,32 +377,33 @@ function StatsBase.describe(io, df::AbstractDataFrame)
         println(io, )
     end
 end
-StatsBase.describe(dv::AbstractArray) = describe(STDOUT, dv)
-function StatsBase.describe{T<:Number}(io, dv::AbstractArray{T})
-    if all(isna(dv))
+StatsBase.describe(nv::AbstractArray) = describe(STDOUT, nv)
+function StatsBase.describe{T<:Number}(io, nv::AbstractArray{T})
+    if all(_isnull, nv)
         println(io, " * All NA * ")
         return
     end
-    filtered = float(dropna(dv))
+    filtered = float(dropnull(nv))
     qs = quantile(filtered, [0, .25, .5, .75, 1])
     statNames = ["Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max"]
     statVals = [qs[1:3]; mean(filtered); qs[4:5]]
     for i = 1:6
-        println(io, string(rpad(statNames[i], 8, " "), " ", string(statVals[i])))
+        println(io, string(rpad(statNames[i], 10, " "), " ", string(statVals[i])))
     end
-    nas = sum(isna(dv))
-    println(io, "NAs      $nas")
-    println(io, "NA%      $(round(nas*100/length(dv), 2))%")
+    nulls = countnull(nv)
+    println(io, "NULLs      $(nulls)")
+    println(io, "NULL %     $(round(nulls*100/length(nv), 2))%")
     return
 end
-function StatsBase.describe{T}(io, dv::AbstractArray{T})
-    ispooled = isa(dv, PooledDataVector) ? "Pooled " : ""
+function StatsBase.describe{T}(io, nv::AbstractArray{T})
+    ispooled = isa(nv, CategoricalVector) ? "Pooled " : ""
+    nulls = countnull(nv)
     # if nothing else, just give the length and element type and NA count
-    println(io, "Length  $(length(dv))")
-    println(io, "Type    $(ispooled)$(string(eltype(dv)))")
-    println(io, "NAs     $(sum(isna(dv)))")
-    println(io, "NA%     $(round(sum(isna(dv))*100/length(dv), 2))%")
-    println(io, "Unique  $(length(unique(dv)))")
+    println(io, "Length    $(length(nv))")
+    println(io, "Type      $(ispooled)$(string(eltype(nv)))")
+    println(io, "NULLs     $(nulls)")
+    println(io, "NULL %    $(round(nulls*100/length(nv), 2))%")
+    println(io, "Unique    $(length(unique(nv)))")
     return
 end
 
@@ -439,8 +413,27 @@ end
 ##
 ##############################################################################
 
+function _nonnull!(res, col)
+    for (i, el) in enumerate(col)
+        res[i] &= !_isnull(el)
+    end
+end
+
+function _nonnull!(res, col::NullableArray)
+    for (i, el) in enumerate(col.isnull)
+        res[i] &= !el
+    end
+end
+
+function _nonnull!(res, col::NullableCategoricalArray)
+    for (i, el) in enumerate(col.refs)
+        res[i] &= el > 0
+    end
+end
+
+
 """
-Indexes of complete cases (rows without NA's)
+Indexes of complete cases (rows without null values)
 
 ```julia
 complete_cases(df::AbstractDataFrame)
@@ -454,29 +447,28 @@ complete_cases(df::AbstractDataFrame)
 
 * `::Vector{Bool}` : indexes of complete cases
 
-See also [`complete_cases!`]({ref}).
+See also [`complete_cases!`](@ref).
 
 **Examples**
 
 ```julia
 df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
-df[[1,4,5], :x] = NA
-df[[9,10], :y] = NA
+df[[1,4,5], :x] = Nullable()
+df[[9,10], :y] = Nullable()
 complete_cases(df)
 ```
 
 """
 function complete_cases(df::AbstractDataFrame)
-    ## Returns a Vector{Bool} of indexes of complete cases (rows with no NA's).
-    res = !isna(df[1])
-    for i in 2:ncol(df)
-        res &= !isna(df[i])
+    res = fill(true, size(df, 1))
+    for i in 1:size(df, 2)
+        _nonnull!(res, df[i])
     end
     res
 end
 
 """
-Delete rows with NA's.
+Delete rows with null values.
 
 ```julia
 complete_cases!(df::AbstractDataFrame)
@@ -490,14 +482,14 @@ complete_cases!(df::AbstractDataFrame)
 
 * `::AbstractDataFrame` : the updated version
 
-See also [`complete_cases`]({ref}).
+See also [`complete_cases`](@ref).
 
 **Examples**
 
 ```julia
 df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
-df[[1,4,5], :x] = NA
-df[[9,10], :y] = NA
+df[[1,4,5], :x] = Nullable()
+df[[9,10], :y] = Nullable()
 complete_cases!(df)
 ```
 
@@ -508,7 +500,8 @@ function Base.convert(::Type{Array}, df::AbstractDataFrame)
     convert(Matrix, df)
 end
 function Base.convert(::Type{Matrix}, df::AbstractDataFrame)
-    T = reduce(typejoin, eltypes(df))
+    T = reduce(promote_type, eltypes(df))
+    T <: Nullable && (T = eltype(T))
     convert(Matrix{T}, df)
 end
 function Base.convert{T}(::Type{Array{T}}, df::AbstractDataFrame)
@@ -518,27 +511,28 @@ function Base.convert{T}(::Type{Matrix{T}}, df::AbstractDataFrame)
     n, p = size(df)
     res = Array(T, n, p)
     idx = 1
-    for col in columns(df)
-        anyna(col) && error("DataFrame contains NAs")
-        copy!(res, idx, data(col))
+    for (name, col) in zip(names(df), columns(df))
+        anynull(col) && error("cannot convert a DataFrame containing null values to array (found for column $name)")
+        copy!(res, idx, convert(Vector{T}, col))
         idx += n
     end
     return res
 end
 
-function Base.convert(::Type{DataArray}, df::AbstractDataFrame)
-    convert(DataMatrix, df)
+function Base.convert(::Type{NullableArray}, df::AbstractDataFrame)
+    convert(NullableMatrix, df)
 end
-function Base.convert(::Type{DataMatrix}, df::AbstractDataFrame)
-    T = reduce(typejoin, eltypes(df))
-    convert(DataMatrix{T}, df)
+function Base.convert(::Type{NullableMatrix}, df::AbstractDataFrame)
+    T = reduce(promote_type, eltypes(df))
+    T <: Nullable && (T = eltype(T))
+    convert(NullableMatrix{T}, df)
 end
-function Base.convert{T}(::Type{DataArray{T}}, df::AbstractDataFrame)
-    convert(DataMatrix{T}, df)
+function Base.convert{T}(::Type{NullableArray{T}}, df::AbstractDataFrame)
+    convert(NullableMatrix{T}, df)
 end
-function Base.convert{T}(::Type{DataMatrix{T}}, df::AbstractDataFrame)
+function Base.convert{T}(::Type{NullableMatrix{T}}, df::AbstractDataFrame)
     n, p = size(df)
-    res = DataArray(T, n, p)
+    res = NullableArray(T, n, p)
     idx = 1
     for col in columns(df)
         copy!(res, idx, col)
@@ -548,7 +542,7 @@ function Base.convert{T}(::Type{DataMatrix{T}}, df::AbstractDataFrame)
 end
 
 """
-Indexes of complete cases (rows without NA's)
+Indexes of duplicate rows (a row that is a duplicate of a prior row)
 
 ```julia
 nonunique(df::AbstractDataFrame)
@@ -565,7 +559,7 @@ nonunique(df::AbstractDataFrame, cols)
 * `::Vector{Bool}` : indicates whether the row is a duplicate of some
   prior row
 
-See also [`unique`]({ref}) and [`unique!`]({ref}).
+See also [`unique`](@ref) and [`unique!`](@ref).
 
 **Examples**
 
@@ -623,7 +617,7 @@ specifying the column(s) to compare.
 When `cols` is specified, the return DataFrame contains complete rows,
 retaining in each case the first instance for which `df[cols]` is unique.
 
-See also [`nonunique`]({ref}).
+See also [`nonunique`](@ref).
 
 **Examples**
 
@@ -641,7 +635,7 @@ unique!(df)  # modifies df
 function nonuniquekey(df::AbstractDataFrame)
     # Here's another (probably a lot faster) way to do `nonunique`
     # by grouping on all columns. It will fail if columns cannot be
-    # made into PooledDataVector's.
+    # made into CategoricalVector's.
     gd = groupby(df, _names(df))
     idx = [1:length(gd.idx)][gd.idx][gd.starts]
     res = fill(true, nrow(df))
@@ -654,7 +648,7 @@ function colmissing(df::AbstractDataFrame) # -> Vector{Int}
     nrows, ncols = size(df)
     missing = zeros(Int, ncols)
     for j in 1:ncols
-        missing[j] = countna(df[j])
+        missing[j] = countnull(df[j])
     end
     return missing
 end
@@ -673,7 +667,7 @@ without(df::AbstractDataFrame, c::Any) = without(df, index(df)[c])
 ##############################################################################
 
 # hcat's first argument must be an AbstractDataFrame
-# Trailing arguments (currently) may also be DataVectors, Vectors, or scalars.
+# Trailing arguments (currently) may also be NullableVectors, Vectors, or scalars.
 
 # hcat! is defined in dataframes/dataframes.jl
 # Its first argument (currently) must be a DataFrame.
@@ -684,84 +678,63 @@ Base.hcat(df::AbstractDataFrame, x) = hcat!(df[:, :], x)
 Base.hcat(df::AbstractDataFrame, x, y...) = hcat!(hcat(df, x), y...)
 
 # vcat only accepts DataFrames. Finds union of columns, maintaining order
-# of first df. Missing data becomes NAs.
+# of first df. Missing data become null values.
 
 Base.vcat(df::AbstractDataFrame) = df
 
 Base.vcat(dfs::AbstractDataFrame...) = vcat(AbstractDataFrame[dfs...])
 
 Base.vcat(dfs::Vector{Void}) = dfs
+
+_isnullable{A<:AbstractArray}(::Type{A}) = eltype(A) <: Nullable
+
 function Base.vcat{T<:AbstractDataFrame}(dfs::Vector{T})
     isempty(dfs) && return DataFrame()
-    coltyps, colnams, similars = _colinfo(dfs)
-
     res = DataFrame()
-    Nrow = sum(nrow, dfs)
-    for j in 1:length(colnams)
-        colnam = colnams[j]
-        col = similar(similars[j], coltyps[j], Nrow)
-
-        i = 1
-        for df in dfs
-            if haskey(df, colnam) && eltype(df[colnam]) != NAtype
-                copy!(col, i, df[colnam])
-            end
-            i += size(df, 1)
+    nrows = sum(nrow, dfs)
+    for colnam in unique(Base.flatten(names.(dfs)))
+        k = Bool[haskey(df, colnam) for df in dfs]
+        if all(k)
+            res[colnam] = vcat((dfs[i][colnam] for i in 1:length(dfs))...)
+            continue
         end
 
-        res[colnam] = col
-    end
-    res
-end
+        c = ((typeof(dfs[i][colnam]) for i in 1:length(dfs) if k[i])...)
+        C = Base.return_types(vcat, c)
+
+        if length(C)==1 && isleaftype(C[1])
+            if _isnullable(C[1])
+                NC = C[1]
+            else
+                NC = NullableArray{eltype(C[1])}
+            end
 
-_isnullable(::AbstractArray) = false
-_isnullable(::AbstractDataArray) = true
-const EMPTY_DATA = DataArray(Void, 0)
-
-function _colinfo{T<:AbstractDataFrame}(dfs::Vector{T})
-    df1 = dfs[1]
-    colindex = copy(index(df1))
-    coltyps = eltypes(df1)
-    similars = collect(columns(df1))
-    nonnull_ct = Int[_isnullable(c) for c in columns(df1)]
-
-    for i in 2:length(dfs)
-        df = dfs[i]
-        for j in 1:size(df, 2)
-            col = df[j]
-            cn, ct = _names(df)[j], eltype(col)
-            if haskey(colindex, cn)
-                idx = colindex[cn]
-
-                oldtyp = coltyps[idx]
-                if !(ct <: oldtyp)
-                    coltyps[idx] = promote_type(oldtyp, ct)
+            col = NC(nrows)
+            j = 1
+            for i in 1:length(dfs)
+                if k[i]
+                    copy!(col, j, dfs[i][colnam])
                 end
-                nonnull_ct[idx] += !_isnullable(col)
-            else # new column
-                push!(colindex, cn)
-                push!(coltyps, ct)
-                push!(similars, col)
-                push!(nonnull_ct, !_isnullable(col))
+                j += nrow(dfs[i])
             end
-        end
-    end
+        else
+            # warn("Unstable return types: ", C, " from vcat of ", [typeof(dfs[i][colnam]) for i in 1:length(dfs) if k[i]])
 
-    for j in 1:length(colindex)
-        if nonnull_ct[j] < length(dfs) && !_isnullable(similars[j])
-            similars[j] = EMPTY_DATA
+            E = Base.promote_eltype(c...)
+            TN = NullableArray{E <: Nullable ? eltype(E) : E}
+            col = vcat((k[i] ? dfs[i][colnam] : TN(nrow(dfs[i])) for i in 1:length(dfs))...)
         end
-    end
-    colnams = _names(colindex)
 
-    coltyps, colnams, similars
+        res[colnam] = col
+    end
+    res
 end
 
 ##############################################################################
 ##
 ## Hashing
 ##
-## Make sure this agrees with is_equals()
+## Make sure this agrees with isequals()
 ##
 ##############################################################################
 
@@ -792,7 +765,7 @@ ncol(df::AbstractDataFrame)
 
 * `::AbstractDataFrame` : the updated version
 
-See also [`size`]({ref}).
+See also [`size`](@ref).
 
 NOTE: these functions may be depreciated for `size`.
 
diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl
index debd979db3..fc1f9ca418 100644
--- a/src/abstractdataframe/io.jl
+++ b/src/abstractdataframe/io.jl
@@ -23,7 +23,7 @@ function printtable(io::IO,
                     header::Bool = true,
                     separator::Char = ',',
                     quotemark::Char = '"',
-                    nastring::AbstractString = "NA")
+                    nastring::AbstractString = "NULL")
     n, p = size(df)
     etypes = eltypes(df)
     if header
@@ -42,10 +42,10 @@ function printtable(io::IO,
     quotestr = string(quotemark)
     for i in 1:n
         for j in 1:p
-            if ! (isna(df[j],i))
+            if !isnull(df[j],i)
                 if ! (etypes[j] <: Real)
 		    print(io, quotemark)
-		    escapedprint(io, df[i, j], quotestr)
+		    escapedprint(io, get(df[i, j]), quotestr)
 		    print(io, quotemark)
                 else
 		    print(io, df[i, j])
@@ -67,7 +67,7 @@ function printtable(df::AbstractDataFrame;
                     header::Bool = true,
                     separator::Char = ',',
                     quotemark::Char = '"',
-                    nastring::AbstractString = "NA")
+                    nastring::AbstractString = "NULL")
     printtable(STDOUT,
                df,
                header = header,
@@ -94,7 +94,7 @@ writetable(filename, df, [keyword options])
 * `separator::Char` -- The separator character that you would like to use. Defaults to the output of `getseparator(filename)`, which uses commas for files that end in `.csv`, tabs for files that end in `.tsv` and a single space for files that end in `.wsv`.
 * `quotemark::Char` -- The character used to delimit string fields. Defaults to `'"'`.
 * `header::Bool` -- Should the file contain a header that specifies the column names from `df`. Defaults to `true`.
-* `nastring::AbstractString` -- What to write in place of missing data. Defaults to `"NA"`.
+* `nastring::AbstractString` -- What to write in place of missing data. Defaults to `"NULL"`.
 
 ### Result
 
@@ -115,7 +115,7 @@ function writetable(filename::AbstractString,
                     header::Bool = true,
                     separator::Char = getseparator(filename),
                     quotemark::Char = '"',
-                    nastring::AbstractString = "NA",
+                    nastring::AbstractString = "NULL",
                     append::Bool = false)
 
     if endswith(filename, ".bz") || endswith(filename, ".bz2")
@@ -169,7 +169,6 @@ function html_escape(cell::AbstractString)
 end
 
 @compat function Base.show(io::IO, ::MIME"text/html", df::AbstractDataFrame)
-    n = size(df, 1)
     cnames = _names(df)
     write(io, "<table class=\"data-frame\">")
     write(io, "<tr>")
@@ -178,13 +177,19 @@ end
         write(io, "<th>$column_name</th>")
     end
     write(io, "</tr>")
-    tty_rows, tty_cols = _displaysize(io)
-    mxrow = min(n,tty_rows)
+    haslimit = get(io, :limit, true)
+    n = size(df, 1)
+    if haslimit
+        tty_rows, tty_cols = _displaysize(io)
+        mxrow = min(n,tty_rows)
+    else
+        mxrow = n
+    end
     for row in 1:mxrow
         write(io, "<tr>")
         write(io, "<th>$row</th>")
         for column_name in cnames
-            cell = string(df[row, column_name])
+            cell = sprint(ourshowcompact, df[row, column_name])
             write(io, "<td>$(html_escape(cell))</td>")
         end
         write(io, "</tr>")
@@ -200,6 +205,60 @@ end
     write(io, "</table>")
 end
 
+##############################################################################
+#
+# LaTeX output
+#
+##############################################################################
+
+function latex_char_escape(char::AbstractString)
+    if char == "\\"
+        return "\\textbackslash{}"
+    elseif char == "~"
+        return "\\textasciitilde{}"
+    else
+        return string("\\", char)
+    end
+end
+
+function latex_escape(cell::AbstractString)
+    cell = replace(cell, ['\\','~','#','$','%','&','_','^','{','}'], latex_char_escape)
+    return cell
+end
+
+function Base.show(io::IO, ::MIME"text/latex", df::AbstractDataFrame)
+    nrows = size(df, 1)
+    ncols = size(df, 2)
+    cnames = _names(df)
+    alignment = repeat("c", ncols)
+    write(io, "\\begin{tabular}{r|")
+    write(io, alignment)
+    write(io, "}\n")
+    write(io, "\t& ")
+    header = join(map(c -> latex_escape(string(c)), cnames), " & ")
+    write(io, header)
+    write(io, "\\\\\n")
+    write(io, "\t\\hline\n")
+    for row in 1:nrows
+        write(io, "\t")
+        write(io, @sprintf("%d", row))
+        for col in 1:ncols
+            write(io, " & ")
+            cell = df[row,col]
+            if !isnull(cell)
+                content = get(cell)
+                if mimewritable(MIME("text/latex"), content)
+                    show(io, MIME("text/latex"), content)
+                else
+                    print(io, latex_escape(string(content)))
+                end
+            end
+        end
+        write(io, " \\\\\n")
+    end
+    write(io, "\\end{tabular}\n")
+end
+
 ##############################################################################
 #
 # MIME
diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl
index c8f7e8c7d9..c57f8e344b 100644
--- a/src/abstractdataframe/join.jl
+++ b/src/abstractdataframe/join.jl
@@ -2,13 +2,26 @@
 ## Join / merge
 ##
 
+# Like similar, but returns a nullable array
+similar_nullable{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
+    NullableArray(T, dims)
+
+similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
+    NullableArray(eltype(T), dims)
+
+similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
+    NullableCategoricalArray(T, dims)
+
+similar_nullable(df::AbstractDataFrame, dims::Int) =
+    DataFrame(Any[similar_nullable(x, dims) for x in columns(df)], copy(index(df)))
+
 function join_idx(left, right, max_groups)
     ## adapted from Wes McKinney's full_outer_join in pandas (file: src/join.pyx).
 
-    # NA group in location 0
+    # NULL group in location 0
 
-    left_sorter, where, left_count = DataArrays.groupsort_indexer(left, max_groups)
-    right_sorter, where, right_count = DataArrays.groupsort_indexer(right, max_groups)
+    left_sorter, where, left_count = groupsort_indexer(left, max_groups)
+    right_sorter, where, right_count = groupsort_indexer(right, max_groups)
 
     # First pass, determine size of result set
     tcount = 0
@@ -27,7 +40,7 @@ function join_idx(left, right, max_groups)
         end
     end
 
-    # group 0 is the NA group
+    # group 0 is the NULL group
     tposition = 0
     lposition = 0
     rposition = 0
@@ -72,66 +85,134 @@ function join_idx(left, right, max_groups)
      right_sorter[right_indexer], right_sorter[rightonly_indexer])
 end
 
-function DataArrays.PooledDataVecs(df1::AbstractDataFrame,
-                                   df2::AbstractDataFrame)
+function sharepools{S,N}(v1::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}},
+                         v2::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}},
+                         index::Vector{S},
+                         R)
+    tidx1 = convert(Vector{R}, indexin(CategoricalArrays.index(v1.pool), index))
+    tidx2 = convert(Vector{R}, indexin(CategoricalArrays.index(v2.pool), index))
+    refs1 = zeros(R, length(v1))
+    refs2 = zeros(R, length(v2))
+    for i in 1:length(refs1)
+        if v1.refs[i] != 0
+            refs1[i] = tidx1[v1.refs[i]]
+        end
+    end
+    for i in 1:length(refs2)
+        if v2.refs[i] != 0
+            refs2[i] = tidx2[v2.refs[i]]
+        end
+    end
+    pool = CategoricalPool{S, R}(index)
+    return (CategoricalArray(refs1, pool),
+            CategoricalArray(refs2, pool))
+end
+
+function sharepools{S,N}(v1::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}},
+                         v2::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}})
+    index = sort(unique([levels(v1); levels(v2)]))
+    sz = length(index)
+
+    R = sz <= typemax(UInt8)  ? UInt8 :
+        sz <= typemax(UInt16) ? UInt16 :
+        sz <= typemax(UInt32) ? UInt32 :
+                                UInt64
+
+    # To ensure type stability during actual work
+    sharepools(v1, v2, index, R)
+end
+
+sharepools{S,N}(v1::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}},
+                v2::AbstractArray{S,N}) =
+    sharepools(v1, oftype(v1, v2))
+
+sharepools{S,N}(v1::AbstractArray{S,N},
+                v2::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}}) =
+    sharepools(oftype(v2, v1), v2)
+
+# TODO: write an optimized version for (Nullable)CategoricalArray
+function sharepools(v1::AbstractArray,
+                    v2::AbstractArray)
+    ## Return two categorical arrays that share the same pool.
+
+    ## TODO: allow specification of R
+    R = CategoricalArrays.DefaultRefType
+    refs1 = Array(R, size(v1))
+    refs2 = Array(R, size(v2))
+    poolref = Dict{promote_type(eltype(v1), eltype(v2)), R}()
+    maxref = 0
+
+    # loop through once to fill the poolref dict
+    for i = 1:length(v1)
+        if !_isnull(v1[i])
+            poolref[v1[i]] = 0
+        end
+    end
+    for i = 1:length(v2)
+        if !_isnull(v2[i])
+            poolref[v2[i]] = 0
+        end
+    end
+
+    # fill positions in poolref
+    pool = sort(collect(keys(poolref)))
+    i = 1
+    for p in pool
+        poolref[p] = i
+        i += 1
+    end
+
+    # fill in newrefs
+    zeroval = zero(R)
+    for i = 1:length(v1)
+        if _isnull(v1[i])
+            refs1[i] = zeroval
+        else
+            refs1[i] = poolref[v1[i]]
+        end
+    end
+    for i = 1:length(v2)
+        if _isnull(v2[i])
+            refs2[i] = zeroval
+        else
+            refs2[i] = poolref[v2[i]]
+        end
+    end
+
+    pool = CategoricalPool(pool)
+    return (NullableCategoricalArray(refs1, pool),
+            NullableCategoricalArray(refs2, pool))
+end
+
+function sharepools(df1::AbstractDataFrame, df2::AbstractDataFrame)
     # This method exists to allow merge to work with multiple columns.
-    # It takes the columns of each DataFrame and returns a DataArray
+    # It takes the columns of each DataFrame and returns a categorical array
     # with a merged pool that "keys" the combination of column values.
     # The pools of the result don't really mean anything.
-    dv1, dv2 = PooledDataVecs(df1[1], df2[1])
-    # use UInt32 instead of the minimum integer size chosen by PooledDataVecs
+    dv1, dv2 = sharepools(df1[1], df2[1])
+    # use UInt32 instead of the minimum integer size chosen by sharepools
     # since the number of levels can be high
     refs1 = Vector{UInt32}(dv1.refs)
     refs2 = Vector{UInt32}(dv2.refs)
-    # the + 1 handles NA's
+    # the + 1 handles nulls
     refs1[:] += 1
     refs2[:] += 1
-    ngroups = length(dv1.pool) + 1
+    ngroups = length(levels(dv1)) + 1
     for j = 2:ncol(df1)
-        dv1, dv2 = PooledDataVecs(df1[j], df2[j])
+        dv1, dv2 = sharepools(df1[j], df2[j])
         for i = 1:length(refs1)
             refs1[i] += (dv1.refs[i]) * ngroups
         end
         for i = 1:length(refs2)
             refs2[i] += (dv2.refs[i]) * ngroups
         end
-        ngroups *= (length(dv1.pool) + 1)
+        ngroups *= length(levels(dv1)) + 1
     end
     # recode refs1 and refs2 to drop the unused column combinations and
     # limit the pool size
-    PooledDataVecs( refs1, refs2 )
+    sharepools(refs1, refs2)
 end
 
-function DataArrays.PooledDataArray{R}(df::AbstractDataFrame, ::Type{R})
-    # This method exists to allow another way for merge to work with
-    # multiple columns. It takes the columns of the DataFrame and
-    # returns a DataArray with a merged pool that "keys" the
-    # combination of column values.
-    # Notes:
-    #   - I skipped the sort to make it faster.
-    #   - Converting each individual one-row DataFrame to a Tuple
-    #     might be faster.
-    refs = zeros(R, nrow(df))
-    poolref = Dict{AbstractDataFrame, Int}()
-    pool = Array(UInt64, 0)
-    j = 1
-    for i = 1:nrow(df)
-        val = df[i,:]
-        if haskey(poolref, val)
-            refs[i] = poolref[val]
-        else
-            push!(pool, hash(val))
-            refs[i] = j
-            poolref[val] = j
-            j += 1
-        end
-    end
-    return PooledDataArray(DataArrays.RefArray(refs), pool)
-end
-
-DataArrays.PooledDataArray(df::AbstractDataFrame) = PooledDataArray(df, DEFAULT_POOLED_REF_TYPE)
-
-
 
 """
 Join two DataFrames
@@ -164,11 +245,11 @@ join(df1::AbstractDataFrame,
   - `:cross` : a full Cartesian product of the key combinations; every
     row of `df1` is matched with every row of `df2`
 
-`NA`s are filled in where needed to complete joins.
+Null values are filled in where needed to complete joins.
 
 ### Result
 
-* `::DataFrame` : the joined DataFrame 
+* `::DataFrame` : the joined DataFrame
 
 ### Examples
 
@@ -199,7 +280,7 @@ function Base.join(df1::AbstractDataFrame,
         throw(ArgumentError("Missing join argument 'on'."))
     end
 
-    dv1, dv2 = PooledDataVecs(df1[on], df2[on])
+    dv1, dv2 = sharepools(df1[on], df2[on])
 
     left_idx, leftonly_idx, right_idx, rightonly_idx =
         join_idx(dv1.refs, dv2.refs, length(dv1.pool))
@@ -216,14 +297,14 @@ function Base.join(df1::AbstractDataFrame,
 
         left = df1[[left_idx; leftonly_idx], :]
         right = vcat(df2w[right_idx, :],
-                     nas(df2w, length(leftonly_idx)))
+                     similar_nullable(df2w, length(leftonly_idx)))
 
         return hcat!(left, right)
     elseif kind == :right
         df1w = without(df1, on)
 
         left = vcat(df1w[left_idx, :],
-                    nas(df1w, length(rightonly_idx)))
+                    similar_nullable(df1w, length(rightonly_idx)))
         right = df2[[right_idx; rightonly_idx], :]
 
         return hcat!(left, right)
@@ -232,8 +313,8 @@ function Base.join(df1::AbstractDataFrame,
 
         mixed = hcat!(df1[left_idx, :], df2w[right_idx, :])
         leftonly = hcat!(df1[leftonly_idx, :],
-                         nas(df2w, length(leftonly_idx)))
-        rightonly = hcat!(nas(df1w, length(rightonly_idx)),
+                         similar_nullable(df2w, length(leftonly_idx)))
+        rightonly = hcat!(similar_nullable(df1w, length(rightonly_idx)),
                           df2[rightonly_idx, :])
 
         return vcat(mixed, leftonly, rightonly)
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 47f2e33b90..75c27bfaa0 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -78,25 +78,24 @@ function stack(df::AbstractDataFrame, measure_vars::Vector{Int}, id_vars::Vector
                   [Compat.repeat(df[c], outer=N) for c in id_vars]...],      # id_var columns
               cnames)
 end
-function stack(df::AbstractDataFrame, measure_vars::Int, id_vars::Int)
-    stack(df, [measure_vars], [id_vars])
+function stack(df::AbstractDataFrame, measure_var::Int, id_var::Int)
+    stack(df, [measure_var], [id_var])
 end
-function stack(df::AbstractDataFrame, measure_vars::Vector{Int}, id_vars::Int)
-    stack(df, measure_vars, [id_vars])
+function stack(df::AbstractDataFrame, measure_vars::Vector{Int}, id_var::Int)
+    stack(df, measure_vars, [id_var])
 end
-function stack(df::AbstractDataFrame, measure_vars::Int, id_vars::Vector{Int})
-    stackdf(df, [measure_vars], id_vars)
+function stack(df::AbstractDataFrame, measure_var::Int, id_vars::Vector{Int})
+    stackdf(df, [measure_var], id_vars)
 end
 stack(df::AbstractDataFrame, measure_vars, id_vars) =
     stack(df, index(df)[measure_vars], index(df)[id_vars])
-function stack(df::AbstractDataFrame, measure_vars)
+# no vars specified, by default select only numeric columns
+numeric_vars(df::AbstractDataFrame) = [T <: AbstractFloat || (T <: Nullable && eltype(T) <: AbstractFloat)
+                                       for T in eltypes(df)]
+function stack(df::AbstractDataFrame, measure_vars = numeric_vars(df))
     mv_inds = index(df)[measure_vars]
     stack(df, mv_inds, _setdiff(1:ncol(df), mv_inds))
 end
-function stack(df::AbstractDataFrame)
-    idx = [1:length(df);][[t <: AbstractFloat for t in eltypes(df)]]
-    stack(df, idx)
-end
 
 """
 Stacks a DataFrame; convert from a wide to long format; see
@@ -163,27 +162,30 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
     # `rowkey` integer indicating which column to place along rows
     # `colkey` integer indicating which column to place along column headers
     # `value` integer indicating which column has values
-    refkeycol = PooledDataArray(df[rowkey])
+    refkeycol = NullableCategoricalArray(df[rowkey])
     valuecol = df[value]
-    # TODO make a version with a default refkeycol
-    keycol = PooledDataArray(df[colkey])
+    keycol = NullableCategoricalArray(df[colkey])
     Nrow = length(refkeycol.pool)
     Ncol = length(keycol.pool)
-    # TODO make fillNA(type, length)
-    payload = DataFrame(Any[DataArray(eltype(valuecol), Nrow) for i in 1:Ncol], map(Symbol, keycol.pool))
+    T = eltype(valuecol)
+    if T <: Nullable
+        T = eltype(T)
+    end
+    payload = DataFrame(Any[NullableArray(T, Nrow) for i in 1:Ncol],
+                        map(Symbol, levels(keycol)))
     nowarning = true
     for k in 1:nrow(df)
-        j = @compat Int(keycol.refs[k])
-        i = @compat Int(refkeycol.refs[k])
+        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
+        i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
         if i > 0 && j > 0
-            if nowarning && !isna(payload[j][i])
+            if nowarning && !isnull(payload[j][i])
                 warn("Duplicate entries in unstack.")
                 nowarning = false
             end
             payload[j][i]  = valuecol[k]
         end
     end
-    insert!(payload, 1, refkeycol.pool, _names(df)[rowkey])
+    insert!(payload, 1, NullableArray(levels(refkeycol)), _names(df)[rowkey])
 end
 unstack(df::AbstractDataFrame, rowkey, colkey, value) =
     unstack(df, index(df)[rowkey], index(df)[colkey], index(df)[value])
@@ -196,24 +198,28 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
     # group on anything not a key or value:
     g = groupby(df, setdiff(_names(df), _names(df)[[colkey, value]]))
     groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
-    rowkey = PooledDataArray(zeros(Int, size(df, 1)), [1:length(groupidxs);])
+    rowkey = zeros(Int, size(df, 1))
     for i in 1:length(groupidxs)
         rowkey[groupidxs[i]] = i
     end
-    keycol = PooledDataArray(df[colkey])
+    keycol = NullableCategoricalArray(df[colkey])
     valuecol = df[value]
     df1 = df[g.idx[g.starts], g.cols]
-    keys = unique(keycol)
     Nrow = length(g)
-    Ncol = length(keycol.pool)
-    df2 = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(@compat(Symbol), keycol.pool))
+    Ncol = length(levels(keycol))
+    T = eltype(valuecol)
+    if T <: Nullable
+        T = eltype(T)
+    end
+    df2 = DataFrame(Any[NullableArray(T, Nrow) for i in 1:Ncol],
+                    map(@compat(Symbol), levels(keycol)))
     nowarning = true
     for k in 1:nrow(df)
-        j = @compat Int(keycol.refs[k])
+        j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
         i = rowkey[k]
         if i > 0 && j > 0
-            if nowarning && !isna(df2[j][i])
-                warn("Duplicate entries in unstack.")
+            if nowarning && !isnull(df2[j][i])
+                warn("Duplicate entries in unstack at row $k.")
                 nowarning = false
             end
             df2[j][i]  = valuecol[k]
@@ -243,7 +249,7 @@ NOTE: Not exported.
 ### Constructor
 
 ```julia
-RepeatedVector(d::AbstractVector...)
+StackedVector(d::AbstractVector...)
 ```
 
 ### Arguments
@@ -289,7 +295,7 @@ Base.ndims(v::StackedVector) = 1
 Base.eltype(v::StackedVector) = promote_type(map(eltype, v.components)...)
 Base.similar(v::StackedVector, T, dims::Dims) = similar(v.components[1], T, dims)
 
-DataArrays.PooledDataArray(v::StackedVector) = PooledDataArray(v[:]) # could be more efficient
+CategoricalArrays.CategoricalArray(v::StackedVector) = CategoricalArray(v[:]) # could be more efficient
 
 
 """
@@ -349,8 +355,8 @@ Base.reverse(v::RepeatedVector) = RepeatedVector(reverse(v.parent), v.inner, v.o
 Base.similar(v::RepeatedVector, T, dims::Dims) = similar(v.parent, T, dims)
 Base.unique(v::RepeatedVector) = unique(v.parent)
 
-function DataArrays.PooledDataArray(v::RepeatedVector)
-    res = DataArrays.PooledDataArray(v.parent)
+function CategoricalArrays.CategoricalArray(v::RepeatedVector)
+    res = CategoricalArrays.CategoricalArray(v.parent)
     res.refs = repeat(res.refs, inner = [v.inner], outer = [v.outer])
     res
 end
@@ -424,26 +430,22 @@ function stackdf(df::AbstractDataFrame, measure_vars::Vector{Int}, id_vars::Vect
                   [RepeatedVector(df[:,c], 1, N) for c in id_vars]...],     # id_var columns
               cnames)
 end
-function stackdf(df::AbstractDataFrame, measure_vars::Int, id_vars::Int)
-    stackdf(df, [measure_vars], [id_vars])
+function stackdf(df::AbstractDataFrame, measure_var::Int, id_var::Int)
+    stackdf(df, [measure_var], [id_var])
 end
-function stackdf(df::AbstractDataFrame, measure_vars, id_vars::Int)
-    stackdf(df, measure_vars, [id_vars])
+function stackdf(df::AbstractDataFrame, measure_vars, id_var::Int)
+    stackdf(df, measure_vars, [id_var])
 end
-function stackdf(df::AbstractDataFrame, measure_vars::Int, id_vars)
-    stackdf(df, [measure_vars], id_vars)
+function stackdf(df::AbstractDataFrame, measure_var::Int, id_vars)
+    stackdf(df, [measure_var], id_vars)
 end
 function stackdf(df::AbstractDataFrame, measure_vars, id_vars)
     stackdf(df, index(df)[measure_vars], index(df)[id_vars])
 end
-function stackdf(df::AbstractDataFrame, measure_vars)
+function stackdf(df::AbstractDataFrame, measure_vars = numeric_vars(df))
     m_inds = index(df)[measure_vars]
     stackdf(df, m_inds, _setdiff(1:ncol(df), m_inds))
 end
-function stackdf(df::AbstractDataFrame)
-    idx = [1:length(df);][[t <: AbstractFloat for t in eltypes(df)]]
-    stackdf(df, idx)
-end
 
 """
 A stacked view of a DataFrame (long format); see `stackdf`
diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl
index 4effdf1965..b981623b85 100644
--- a/src/abstractdataframe/show.jl
+++ b/src/abstractdataframe/show.jl
@@ -62,8 +62,9 @@ end
 #' ourshowcompact(STDOUT, "abc")
 #' ourshowcompact(STDOUT, 10000)
 ourshowcompact(io::IO, x::Any) = showcompact(io, x) # -> Void
-ourshowcompact(io::IO, x::AbstractString) = showcompact(io, x) # -> Void
+ourshowcompact(io::IO, x::AbstractString) = print(io, x) # -> Void
 ourshowcompact(io::IO, x::Symbol) = print(io, x) # -> Void
+ourshowcompact(io::IO, x::Nullable{String}) = isnull(x) ? showcompact(io, x) : print(io, get(x)) # -> Void
 
 #' @description
 #'
@@ -100,8 +101,6 @@ function getmaxwidths(df::AbstractDataFrame,
                       rowlabel::Symbol) # -> Vector{Int}
     maxwidths = Array(Int, size(df, 2) + 1)
 
-    # TODO: Move this definition somewhere else
-    NAstrwidth = 2
     undefstrwidth = ourstrwidth(Base.undef_ref_str)
 
     j = 1
@@ -110,17 +109,11 @@ function getmaxwidths(df::AbstractDataFrame,
         maxwidth = ourstrwidth(name)
 
         # (2) Consider length of longest entry in that column
-        for indices in (rowindices1, rowindices2)
-            for i in indices
-                if isna(col, i)
-                    maxwidth = max(maxwidth, NAstrwidth)
-                else
-                    try
-                        maxwidth = max(maxwidth, ourstrwidth(col[i]))
-                    catch
-                        maxwidth = max(maxwidth, undefstrwidth)
-                    end
-                end
+        for indices in (rowindices1, rowindices2), i in indices
+            try
+                maxwidth = max(maxwidth, ourstrwidth(col[i]))
+            catch
+                maxwidth = max(maxwidth, undefstrwidth)
             end
         end
         maxwidths[j] = maxwidth
diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl
index c92021f2e8..1d05e2c9da 100644
--- a/src/abstractdataframe/sort.jl
+++ b/src/abstractdataframe/sort.jl
@@ -308,7 +308,3 @@ end
 Base.sort(df::AbstractDataFrame, a::Algorithm, o::Ordering) = df[sortperm(df, a, o),:]
 Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::@compat(Union{Perm,DFPerm})) = sort!([1:size(df, 1);], a, o)
 Base.sortperm(df::AbstractDataFrame, a::Algorithm, o::Ordering) = sortperm(df, a, DFPerm(o,df))
-
-# Extras to speed up sorting
-Base.sortperm{V}(df::AbstractDataFrame, a::Algorithm, o::FastPerm{Sort.ForwardOrdering,V}) = sortperm(o.vec)
-Base.sortperm{V}(df::AbstractDataFrame, a::Algorithm, o::FastPerm{Sort.ReverseOrdering,V}) = reverse(sortperm(o.vec))
diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
index 3c33fc1505..4ce59b324f 100644
--- a/src/dataframe/dataframe.jl
+++ b/src/dataframe/dataframe.jl
@@ -2,7 +2,7 @@
 An AbstractDataFrame that stores a set of named columns
 
 The columns are normally AbstractVectors stored in memory,
-particularly a Vector, DataVector, or PooledDataVector.
+particularly a Vector, NullableVector, or CategoricalVector.
 
 **Constructors**
 
@@ -30,9 +30,9 @@ Each column in `columns` should be the same length.
 
 **Notes**
 
-Most of the default constructors convert columns to `DataArrays`.  The
+Most of the default constructors convert columns to `NullableArray`.  The
 base constructor, `DataFrame(columns::Vector{Any},
-names::Vector{Symbol})` does not convert to `DataArrays`.
+names::Vector{Symbol})` does not convert to `NullableArray`.
 
 A `DataFrame` is a lightweight object. As long as columns are not
 manipulated, creation of a DataFrame from existing AbstractVectors is
@@ -48,12 +48,12 @@ loops.
 ```julia
 df = DataFrame()
 v = ["x","y","z"][rand(1:3, 10)]
-df1 = DataFrame(Any[[1:10], v, rand(10)], [:A, :B, :C])  # columns are Arrays
-df2 = DataFrame(A = 1:10, B = v, C = rand(10))           # columns are DataArrays
+df1 = DataFrame(Any[collect(1:10), v, rand(10)], [:A, :B, :C])  # columns are Arrays
+df2 = DataFrame(A = 1:10, B = v, C = rand(10))           # columns are NullableArrays
 dump(df1)
 dump(df2)
 describe(df2)
-head(df1)
+DataFrames.head(df1)
 df1[:A] + df2[:C]
 df1[1:4, 1:2]
 df1[[:A,:C]]
@@ -102,9 +102,9 @@ function DataFrame(; kwargs...)
     return result
 end
 
-function DataFrame(columns::Vector{Any},
-                   cnames::Vector{Symbol} = gennames(length(columns)))
-    return DataFrame(columns, Index(cnames))
+function DataFrame(columns::AbstractVector,
+                   cnames::AbstractVector{Symbol} = gennames(length(columns)))
+    return DataFrame(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames)))
 end
 
 
@@ -112,7 +112,7 @@ end
 function DataFrame(t::Type, nrows::Integer, ncols::Integer)
     columns = Array(Any, ncols)
     for i in 1:ncols
-        columns[i] = DataArray(t, nrows)
+        columns[i] = NullableArray(t, nrows)
     end
     cnames = gennames(ncols)
     return DataFrame(columns, Index(cnames))
@@ -123,19 +123,21 @@ function DataFrame(column_eltypes::Vector, cnames::Vector, nrows::Integer)
     p = length(column_eltypes)
     columns = Array(Any, p)
     for j in 1:p
-        columns[j] = DataArray(column_eltypes[j], nrows)
+        columns[j] = NullableArray(column_eltypes[j], nrows)
     end
     return DataFrame(columns, Index(cnames))
 end
-# Initialize an empty DataFrame with specific eltypes and names and whether is pooled data array
-function DataFrame(column_eltypes::Vector{DataType}, cnames::Vector{Symbol}, ispda::Vector{Bool}, nrows::Integer)
+# Initialize an empty DataFrame with specific eltypes and names
+# and whether a nominal array should be created
+function DataFrame(column_eltypes::Vector{DataType}, cnames::Vector{Symbol},
+                   nominal::Vector{Bool}, nrows::Integer)
     p = length(column_eltypes)
     columns = Array(Any, p)
     for j in 1:p
-      if ispda[j]
-        columns[j] = PooledDataArray(column_eltypes[j], nrows)
+      if nominal[j]
+        columns[j] = NullableCategoricalArray(column_eltypes[j], nrows)
       else
-        columns[j] = DataArray(column_eltypes[j], nrows)
+        columns[j] = NullableArray(column_eltypes[j], nrows)
       end
     end
     return DataFrame(columns, Index(cnames))
@@ -147,7 +149,7 @@ function DataFrame(column_eltypes::Vector, nrows::Integer)
     columns = Array(Any, p)
     cnames = gennames(p)
     for j in 1:p
-        columns[j] = DataArray(column_eltypes[j], nrows)
+        columns[j] = NullableArray(column_eltypes[j], nrows)
     end
     return DataFrame(columns, Index(cnames))
 end
@@ -167,8 +169,7 @@ function DataFrame{D <: Associative}(ds::Vector{D}, ks::Vector)
     col_eltypes = Type[@compat(Union{}) for _ = 1:length(ks)]
     for d in ds
         for (i,k) in enumerate(ks)
-            # TODO: check for user-defined "NA" values, ala pandas
-            if haskey(d, k) && !isna(d[k])
+            if haskey(d, k) && !_isnull(d[k])
                 col_eltypes[i] = promote_type(col_eltypes[i], typeof(d[k]))
             end
         end
@@ -179,7 +180,7 @@ function DataFrame{D <: Associative}(ds::Vector{D}, ks::Vector)
     df = DataFrame(col_eltypes, ks, length(ds))
     for (i,d) in enumerate(ds)
         for (j,k) in enumerate(ks)
-            df[i,j] = get(d, k, NA)
+            df[i,j] = get(d, k, Nullable())
         end
     end
 
@@ -230,7 +231,9 @@ function Base.getindex(df::DataFrame, col_ind::ColumnIndex)
 end
 
 # df[MultiColumnIndex] => (Sub)?DataFrame
-function Base.getindex{T <: ColumnIndex}(df::DataFrame, col_inds::AbstractVector{T})
+function Base.getindex{T <: ColumnIndex}(df::DataFrame,
+                                         col_inds::Union{AbstractVector{T},
+                                                         AbstractVector{Nullable{T}}})
     selected_columns = index(df)[col_inds]
     new_columns = df.columns[selected_columns]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
@@ -246,20 +249,29 @@ function Base.getindex(df::DataFrame, row_ind::Real, col_ind::ColumnIndex)
 end
 
 # df[SingleRowIndex, MultiColumnIndex] => (Sub)?DataFrame
-function Base.getindex{T <: ColumnIndex}(df::DataFrame, row_ind::Real, col_inds::AbstractVector{T})
+function Base.getindex{T <: ColumnIndex}(df::DataFrame,
+                                         row_ind::Real,
+                                         col_inds::Union{AbstractVector{T},
+                                                         AbstractVector{Nullable{T}}})
     selected_columns = index(df)[col_inds]
     new_columns = Any[dv[[row_ind]] for dv in df.columns[selected_columns]]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
 end
 
 # df[MultiRowIndex, SingleColumnIndex] => (Sub)?AbstractDataVector
-function Base.getindex{T <: Real}(df::DataFrame, row_inds::AbstractVector{T}, col_ind::ColumnIndex)
+function Base.getindex{T <: Real}(df::DataFrame,
+                                  row_inds::Union{AbstractVector{T}, AbstractVector{Nullable{T}}},
+                                  col_ind::ColumnIndex)
     selected_column = index(df)[col_ind]
     return df.columns[selected_column][row_inds]
 end
 
 # df[MultiRowIndex, MultiColumnIndex] => (Sub)?DataFrame
-function Base.getindex{R <: Real, T <: ColumnIndex}(df::DataFrame, row_inds::AbstractVector{R}, col_inds::AbstractVector{T})
+function Base.getindex{R <: Real, T <: ColumnIndex}(df::DataFrame,
+                                                    row_inds::Union{AbstractVector{R},
+                                                                    AbstractVector{Nullable{R}}},
+                                                    col_inds::Union{AbstractVector{T},
+                                                                    AbstractVector{Nullable{T}}})
     selected_columns = index(df)[col_inds]
     new_columns = Any[dv[row_inds] for dv in df.columns[selected_columns]]
     return DataFrame(new_columns, Index(_names(df)[selected_columns]))
@@ -267,13 +279,20 @@ end
 
 # df[:, SingleColumnIndex] => (Sub)?AbstractVector
 # df[:, MultiColumnIndex] => (Sub)?DataFrame
-Base.getindex{T<:ColumnIndex}(df::DataFrame, row_inds::Colon, col_inds::@compat(Union{T, AbstractVector{T}})) = df[col_inds]
+Base.getindex{T<:ColumnIndex}(df::DataFrame,
+                              row_inds::Colon,
+                              col_inds::Union{T, AbstractVector{T},
+                                              AbstractVector{Nullable{T}}}) =
+    df[col_inds]
 
 # df[SingleRowIndex, :] => (Sub)?DataFrame
 Base.getindex(df::DataFrame, row_ind::Real, col_inds::Colon) = df[[row_ind], col_inds]
 
 # df[MultiRowIndex, :] => (Sub)?DataFrame
-function Base.getindex{R<:Real}(df::DataFrame, row_inds::AbstractVector{R}, col_inds::Colon)
+function Base.getindex{R<:Real}(df::DataFrame,
+                                row_inds::Union{AbstractVector{R},
+                                                AbstractVector{Nullable{R}}},
+                                col_inds::Colon)
     new_columns = Any[dv[row_inds] for dv in df.columns]
     return DataFrame(new_columns, copy(index(df)))
 end
@@ -344,17 +363,17 @@ function insert_multiple_entries!{T <: Real}(df::DataFrame,
     end
 end
 
-upgrade_vector(v::Vector) = DataArray(v, falses(length(v)))
-upgrade_vector(v::Range) = DataArray([v;], falses(length(v)))
-upgrade_vector(v::BitVector) = DataArray(convert(Array{Bool}, v), falses(length(v)))
-upgrade_vector(adv::AbstractDataArray) = adv
+upgrade_vector{T<:Nullable}(v::AbstractArray{T}) = v
+upgrade_vector(v::CategoricalArray) = NullableCategoricalArray(v)
+upgrade_vector(v::AbstractArray) = NullableArray(v)
+
 function upgrade_scalar(df::DataFrame, v::AbstractArray)
     msg = "setindex!(::DataFrame, ...) only broadcasts scalars, not arrays"
     throw(ArgumentError(msg))
 end
 function upgrade_scalar(df::DataFrame, v::Any)
     n = (ncol(df) == 0) ? 1 : nrow(df)
-    DataArray(fill(v, n), falses(n))
+    NullableArray(fill(v, n))
 end
 
 # df[SingleColumnIndex] = AbstractVector
@@ -365,10 +384,13 @@ function Base.setindex!(df::DataFrame,
 end
 
 # df[SingleColumnIndex] = Single Item (EXPANDS TO NROW(DF) if NCOL(DF) > 0)
-function Base.setindex!(df::DataFrame,
-                v::Any,
-                col_ind::ColumnIndex)
-    insert_single_column!(df, upgrade_scalar(df, v), col_ind)
+function Base.setindex!(df::DataFrame, v, col_ind::ColumnIndex)
+    if haskey(index(df), col_ind)
+        fill!(df[col_ind], v)
+    else
+        insert_single_column!(df, upgrade_scalar(df, v), col_ind)
+    end
+    return df
 end
 
 # df[MultiColumnIndex] = DataFrame
@@ -397,7 +419,7 @@ function Base.setindex!{T <: ColumnIndex}(df::DataFrame,
                                   col_inds::AbstractVector{T})
     dv = upgrade_vector(v)
     for col_ind in col_inds
-        insert_single_column!(df, dv, col_ind)
+        df[col_ind] = dv
     end
     return df
 end
@@ -411,9 +433,8 @@ end
 function Base.setindex!{T <: ColumnIndex}(df::DataFrame,
                                   val::Any,
                                   col_inds::AbstractVector{T})
-    dv = upgrade_scalar(df, val)
     for col_ind in col_inds
-        insert_single_column!(df, dv, col_ind)
+        df[col_ind] = val
     end
     return df
 end
@@ -621,8 +642,20 @@ function Base.insert!(df::DataFrame, col_ind::Int, item::AbstractVector, name::S
     insert!(df.columns, col_ind, item)
     df
 end
-Base.insert!(df::DataFrame, col_ind::Int, item, name::Symbol) =
+
+# FIXME: Needed to work around a crash: JuliaLang/julia#18299
+function Base.insert!(df::DataFrame, col_ind::Int, item::NullableArray, name::Symbol)
+    0 < col_ind <= ncol(df) + 1 || throw(BoundsError())
+    size(df, 1) == length(item) || size(df, 1) == 0 || error("number of rows does not match")
+
+    insert!(index(df), col_ind, name)
+    insert!(df.columns, col_ind, item)
+    df
+end
+
+function Base.insert!(df::DataFrame, col_ind::Int, item, name::Symbol)
     insert!(df, col_ind, upgrade_scalar(df, item), name)
+end
 
 function Base.merge!(df::DataFrame, others::AbstractDataFrame...)
     for other in others
@@ -721,9 +754,11 @@ function hcat!(df1::DataFrame, df2::AbstractDataFrame)
 
     return df1
 end
-hcat!{T}(df::DataFrame, x::DataVector{T}) = hcat!(df, DataFrame(Any[x]))
-hcat!{T}(df::DataFrame, x::Vector{T}) = hcat!(df, DataFrame(Any[DataArray(x)]))
-hcat!{T}(df::DataFrame, x::T) = hcat!(df, DataFrame(Any[DataArray([x])]))
+hcat!(df::DataFrame, x::CategoricalArray) = hcat!(df, DataFrame(Any[x]))
+hcat!(df::DataFrame, x::NullableCategoricalArray) = hcat!(df, DataFrame(Any[x]))
+hcat!(df::DataFrame, x::NullableVector) = hcat!(df, DataFrame(Any[x]))
+hcat!(df::DataFrame, x::Vector) = hcat!(df, DataFrame(Any[NullableArray(x)]))
+hcat!(df::DataFrame, x) = hcat!(df, DataFrame(Any[NullableArray([x])]))
 
 # hcat! for 1-n arguments
 hcat!(df::DataFrame) = df
@@ -739,7 +774,7 @@ Base.hcat(df::DataFrame, x) = hcat!(copy(df), x)
 ##############################################################################
 
 function nullable!(df::DataFrame, col::ColumnIndex)
-    df[col] = DataArray(df[col])
+    df[col] = NullableArray(df[col])
     df
 end
 function nullable!{T <: ColumnIndex}(df::DataFrame, cols::Vector{T})
@@ -755,25 +790,23 @@ end
 ##
 ##############################################################################
 
-pool(a::AbstractVector) = compact(PooledDataArray(a))
-
-function pool!(df::DataFrame, cname::@compat(Union{Integer, Symbol}))
-    df[cname] = pool(df[cname])
+function categorical!(df::DataFrame, cname::@compat(Union{Integer, Symbol}), compact::Bool=true)
+    df[cname] = categorical(df[cname], compact)
     return
 end
 
-function pool!{T <: @compat(Union{Integer, Symbol})}(df::DataFrame, cnames::Vector{T})
+function categorical!{T <: @compat(Union{Integer, Symbol})}(df::DataFrame, cnames::Vector{T},
+                                                            compact::Bool=true)
     for cname in cnames
-        df[cname] = pool(df[cname])
+        df[cname] = categorical(df[cname], compact)
     end
     return
 end
 
-# TODO: Deprecate or change for being too inconsistent with other pool methods
-function pool!(df::DataFrame)
+function categorical!(df::DataFrame, compact::Bool=true)
     for i in 1:size(df, 2)
         if eltype(df[i]) <: AbstractString
-            df[i] = pool(df[i])
+            df[i] = categorical(df[i], compact)
         end
     end
     return
@@ -811,7 +844,7 @@ function _dataframe_from_associative(dnames, d::Associative)
         if length(col) != n
             throw(ArgumentError("All columns in Dict must have the same length"))
         end
-        columns[j] = DataArray(col)
+        columns[j] = NullableArray(col)
         colnames[j] = Symbol(name)
     end
     return DataFrame(columns, Index(colnames))
diff --git a/src/dataframe/io.jl b/src/dataframe/io.jl
index c7f76baae6..6a86c2466d 100644
--- a/src/dataframe/io.jl
+++ b/src/dataframe/io.jl
@@ -517,7 +517,7 @@ function builddf(rows::Integer,
             values = Array(o.eltypes[j], rows)
         end
 
-        missing = falses(rows)
+        missing = fill(false, rows)
         is_int = true
         is_float = true
         is_bool = true
@@ -640,9 +640,9 @@ function builddf(rows::Integer,
         end
 
         if o.makefactors && !(is_int || is_float || is_bool)
-            columns[j] = PooledDataArray(values, missing)
+            columns[j] = NullableCategoricalArray(values, missing)
         else
-            columns[j] = DataArray(values, missing)
+            columns[j] = NullableArray(values, missing)
         end
     end
 
@@ -801,7 +801,7 @@ function readtable(io::IO,
                    separator::Char = ',',
                    quotemark::Vector{Char} = ['"'],
                    decimal::Char = '.',
-                   nastrings::Vector = ["", "NA"],
+                   nastrings::Vector = ["", "NULL", "NA"],
                    truestrings::Vector = ["T", "t", "TRUE", "true"],
                    falsestrings::Vector = ["F", "f", "FALSE", "false"],
                    makefactors::Bool = false,
@@ -874,10 +874,10 @@ readtable(filename, [keyword options])
 *   `separator::Char` -- Assume that fields are split by the `separator` character. If not specified, it will be guessed from the filename: `.csv` defaults to `','`, `.tsv` defaults to `'\t'`, `.wsv` defaults to `' '`.
 *   `quotemark::Vector{Char}` -- Assume that fields contained inside of two `quotemark` characters are quoted, which disables processing of separators and linebreaks. Set to `Char[]` to disable this feature and slightly improve performance. Defaults to `['"']`.
 *   `decimal::Char` -- Assume that the decimal place in numbers is written using the `decimal` character. Defaults to `'.'`.
-*   `nastrings::Vector{String}` -- Translate any of the strings into this vector into an `NA`. Defaults to `["", "NA"]`.
+*   `nastrings::Vector{String}` -- Translate any of the strings into this vector into a NULL value. Defaults to `["", "NULL", "NA"]`.
 *   `truestrings::Vector{String}` -- Translate any of the strings into this vector into a Boolean `true`. Defaults to `["T", "t", "TRUE", "true"]`.
 *   `falsestrings::Vector{String}` -- Translate any of the strings into this vector into a Boolean `false`. Defaults to `["F", "f", "FALSE", "false"]`.
-*   `makefactors::Bool` -- Convert string columns into `PooledDataVector`'s for use as factors. Defaults to `false`.
+*   `makefactors::Bool` -- Convert string columns into `CategoricalVector`'s for use as factors. Defaults to `false`.
 *   `nrows::Int` -- Read only `nrows` from the file. Defaults to `-1`, which indicates that the entire file should be read.
 *   `names::Vector{Symbol}` -- Use the values in this array as the names for all columns instead of or in lieu of the names in the file's header. Defaults to `[]`, which indicates that the header should be used if present or that numeric names should be invented if there is no header.
 *   `eltypes::Vector` -- Specify the types of all columns. Defaults to `[]`.
@@ -909,7 +909,7 @@ function readtable(pathname::AbstractString;
                    separator::Char = getseparator(pathname),
                    quotemark::Vector{Char} = ['"'],
                    decimal::Char = '.',
-                   nastrings::Vector = String["", "NA"],
+                   nastrings::Vector = String["", "NULL", "NA"],
                    truestrings::Vector = String["T", "t", "TRUE", "true"],
                    falsestrings::Vector = String["F", "f", "FALSE", "false"],
                    makefactors::Bool = false,
@@ -975,7 +975,7 @@ literals. Parses the string `s` containing delimiter-separated tabular data
 argument contains a list of flag characters, which, if present, are equivalent
 to supplying named arguments to `readtable` as follows:
 
-- `f`: `makefactors=true`, convert string columns to `PooledData` columns
+- `f`: `makefactors=true`, convert string columns to `CategoricalArray` columns
 - `c`: `allowcomments=true`, ignore lines beginning with `#`
 - `H`: `header=false`, do not interpret the first line as column names
 """
@@ -1004,7 +1004,7 @@ separated values (CSV) using `readtable`, just as if it were being loaded from
 an external file. The suffix flags `f`, `c`, and `H` are optional. If present,
 they are equivalent to supplying named arguments to `readtable` as follows:
 
-* `f`: `makefactors=true`, convert string columns to `PooledDataArray` columns
+* `f`: `makefactors=true`, convert string columns to `CategoricalArray` columns
 * `c`: `allowcomments=true`, ignore lines beginning with `#`
 * `H`: `header=false`, do not interpret the first line as column names
 
@@ -1038,7 +1038,7 @@ character, just as if it were being loaded from an external file. The suffix
 flags `f`, `c`, and `H` are optional. If present, they are equivalent to
 supplying named arguments to `readtable` as follows:
 
-* `f`: `makefactors=true`, convert string columns to `PooledDataArray` columns
+* `f`: `makefactors=true`, convert string columns to `CategoricalArray` columns
 * `c`: `allowcomments=true`, ignore lines beginning with `#`
 * `H`: `header=false`, do not interpret the first line as column names
 
@@ -1074,7 +1074,7 @@ loaded from an external file. The suffix flags `f`, `c`, and `H` are optional.
 If present, they are equivalent to supplying named arguments to `readtable` as
 follows:
 
-* `f`: `makefactors=true`, convert string columns to `PooledDataArray` columns
+* `f`: `makefactors=true`, convert string columns to `CategoricalArray` columns
 * `c`: `allowcomments=true`, ignore lines beginning with `#`
 * `H`: `header=false`, do not interpret the first line as column names
 
@@ -1107,7 +1107,7 @@ separated values (TSV) using `readtable`, just as if it were being loaded from
 an external file. The suffix flags `f`, `c`, and `H` are optional. If present,
 they are equivalent to supplying named arguments to `readtable` as follows:
 
-* `f`: `makefactors=true`, convert string columns to `PooledDataArray` columns
+* `f`: `makefactors=true`, convert string columns to `CategoricalArray` columns
 * `c`: `allowcomments=true`, ignore lines beginning with `#`
 * `H`: `header=false`, do not interpret the first line as column names
 
diff --git a/src/dataframe/sort.jl b/src/dataframe/sort.jl
index 9255557469..e6cd01a88d 100644
--- a/src/dataframe/sort.jl
+++ b/src/dataframe/sort.jl
@@ -12,7 +12,14 @@ end
 function Base.sort!(df::DataFrame, a::Base.Sort.Algorithm, o::Base.Sort.Ordering)
     p = sortperm(df, a, o)
     pp = similar(p)
-    for col in columns(df)
+    c = columns(df)
+
+    for (i,col) in enumerate(c)
+        # Check if this column has been sorted already
+        if any(j -> c[j]===col, 1:i-1)
+            continue
+        end
+
         copy!(pp,p)
         Base.permute!!(col, pp)
     end
diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
index daf7db86d7..1614e317b6 100644
--- a/src/dataframerow/dataframerow.jl
+++ b/src/dataframerow/dataframerow.jl
@@ -41,7 +41,7 @@ Base.convert(::Type{Array}, r::DataFrameRow) = convert(Array, r.df[r.row,:])
 # so that duplicate rows would have the same hash
 function Base.hash(r::DataFrameRow, h::UInt)
     for col in columns(r.df)
-        if isna(col, r.row)
+        if _isnull(col[r.row])
             h = hash(false, h)
         else
             h = hash(true, hash(col[r.row], h))
@@ -50,34 +50,16 @@ function Base.hash(r::DataFrameRow, h::UInt)
     return h
 end
 
-# compare two elements in the array
-_isequalelms(a::Array, i::Int, j::Int) = isequal(a[i], a[j])
-
-# compare the two elements in the data array
-function _isequalelms(a::DataArray, i::Int, j::Int)
-    if isna(a, i)
-        return isna(a, j)
-    else
-        return !isna(a, j) && isequal(a.data[i], a.data[j])
-    end
-end
-
-# compare two elements in the pooled array
-# NOTE assume there are no duplicated elements in the pool
-_isequalelms(a::PooledDataArray, i::Int, j::Int) = isequal(a.refs[i], a.refs[j])
-
 # comparison of DataFrame rows
 # only the rows of the same DataFrame could be compared
 # rows are equal if they have the same values (while the row indices could differ)
+@compat(Base.:(==))(r1::DataFrameRow, r2::DataFrameRow) = isequal(r1, r2)
+
 function Base.isequal(r1::DataFrameRow, r2::DataFrameRow)
-    if r1.df !== r2.df
-        throw(ArgumentError("Comparing rows from different frames not supported"))
-    end
-    if r1.row == r2.row
-        return true
-    end
+    r1.df == r2.df || throw(ArgumentError("Comparing rows from different frames not supported"))
+    r1.row == r2.row && return true
     for col in columns(r1.df)
-        if !_isequalelms(col, r1.row, r2.row)
+        if !isequal(col[r1.row], col[r2.row])
             return false
         end
     end
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 0c7e43c31d..286aa61b93 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -4,17 +4,13 @@ import Base: @deprecate
 @deprecate by(d::AbstractDataFrame, cols, s::Symbol) aggregate(d, cols, eval(s))
 @deprecate nullable!(colnames::Array{Symbol,1}, df::AbstractDataFrame) nullable!(df, colnames)
 @deprecate nullable!(colnums::Array{Int,1}, df::AbstractDataFrame) nullable!(df, colnums)
+
 import Base: keys, values, insert!
 @deprecate keys(df::AbstractDataFrame) names(df)
 @deprecate values(df::AbstractDataFrame) DataFrames.columns(df)
 @deprecate insert!(df::DataFrame, df2::AbstractDataFrame) merge!(df, df2)
 
-import DataArrays: array, DataArray
-@deprecate array(df::AbstractDataFrame) convert(Array, df)
-@deprecate array(r::DataFrameRow) convert(Array, r)
-if VERSION < v"0.4.0-"
-    @deprecate DataArray(df::AbstractDataFrame) convert(DataArray, df)
-end
-@deprecate DataArray(df::AbstractDataFrame, T::DataType) convert(DataArray{T}, df)
-
 @deprecate read_rda(args...) FileIO.load(args...)
+
+@deprecate pool categorical
+@deprecate pool! categorical!
diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
index 50dbc288fc..9caca98f24 100644
--- a/src/groupeddataframe/grouping.jl
+++ b/src/groupeddataframe/grouping.jl
@@ -25,6 +25,41 @@ end
 #
 # Split
 #
+
+function groupsort_indexer(x::AbstractVector, ngroups::Integer, null_last::Bool=false)
+    # translated from Wes McKinney's groupsort_indexer in pandas (file: src/groupby.pyx).
+
+    # count group sizes, location 0 for NULL
+    n = length(x)
+    # counts = x.pool
+    counts = fill(0, ngroups + 1)
+    for i = 1:n
+        counts[x[i] + 1] += 1
+    end
+
+    # mark the start of each contiguous group of like-indexed data
+    where = fill(1, ngroups + 1)
+    if null_last
+        for i = 3:ngroups+1
+            where[i] = where[i - 1] + counts[i - 1]
+        end
+        where[1] = where[end] + counts[end]
+    else
+        for i = 2:ngroups+1
+            where[i] = where[i - 1] + counts[i - 1]
+        end
+    end
+
+    # this is our indexer
+    result = fill(0, n)
+    for i = 1:n
+        label = x[i] + 1
+        result[where[label]] = i
+        where[label] += 1
+    end
+    result, where, counts
+end
+
 """
 A view of an AbstractDataFrame split into row groups
 
@@ -35,14 +70,13 @@ groupby(cols)
 
 ### Arguments
 
-* `d` : an AbstractDataFrame
-* `cols` : an 
-
-If `d` is not provided, a curried version of groupby is given.
+* `d` : an AbstractDataFrame to split (optional, see [Returns](#returns))
+* `cols` : data frame columns to group by
 
 ### Returns
 
 * `::GroupedDataFrame` : a grouped view into `d`
+* `::Function`: a function `x -> groupby(x, cols)` (if `d` is not specified)
 
 ### Details
 
@@ -76,8 +110,8 @@ vcat([g[:b] for g in gd]...)
 for g in gd
     println(g)
 end
-map(d -> mean(d[:c]), gd)   # returns a GroupApplied object
-combine(map(d -> mean(d[:c]), gd))
+map(d -> mean(dropnull(d[:c])), gd)   # returns a GroupApplied object
+combine(map(d -> mean(dropnull(d[:c])), gd))
 df |> groupby(:a) |> [sum, length]
 df |> groupby([:a, :b]) |> [sum, length]
 ```
@@ -88,25 +122,34 @@ function groupby{T}(d::AbstractDataFrame, cols::Vector{T})
     ##     http://wesmckinney.com/blog/?p=489
 
     ncols = length(cols)
-    # use the pool trick to get a set of integer references for each unique item
-    dv = PooledDataArray(d[cols[ncols]])
-    # if there are NAs, add 1 to the refs to avoid underflows in x later
-    dv_has_nas = (findfirst(dv.refs, 0) > 0 ? 1 : 0)
-    # use UInt32 instead of the PDA's integer size since the number of levels can be high
-    x = copy!(similar(dv.refs, UInt32), dv.refs) .+ dv_has_nas
+    # use CategoricalArray to get a set of integer references for each unique item
+    nv = NullableCategoricalArray(d[cols[ncols]])
+    # if there are NULLs, add 1 to the refs to avoid underflows in x later
+    anynulls = (findfirst(nv.refs, 0) > 0 ? 1 : 0)
+    # use UInt32 instead of the original array's integer size since the number of levels can be high
+    x = similar(nv.refs, UInt32)
+    for i = 1:nrow(d)
+        if nv.refs[i] == 0
+            x[i] = 1
+        else
+            x[i] = CategoricalArrays.order(nv.pool)[nv.refs[i]] + anynulls
+        end
+    end
     # also compute the number of groups, which is the product of the set lengths
-    ngroups = length(dv.pool) + dv_has_nas
+    ngroups = length(levels(nv)) + anynulls
     # if there's more than 1 column, do roughly the same thing repeatedly
     for j = (ncols - 1):-1:1
-        dv = PooledDataArray(d[cols[j]])
-        dv_has_nas = (findfirst(dv.refs, 0) > 0 ? 1 : 0)
+        nv = NullableCategoricalArray(d[cols[j]])
+        anynulls = (findfirst(nv.refs, 0) > 0 ? 1 : 0)
         for i = 1:nrow(d)
-            x[i] += (dv.refs[i] + dv_has_nas- 1) * ngroups
+            if nv.refs[i] != 0
+                x[i] += (CategoricalArrays.order(nv.pool)[nv.refs[i]] + anynulls - 1) * ngroups
+            end
         end
-        ngroups = ngroups * (length(dv.pool) + dv_has_nas)
+        ngroups = ngroups * (length(levels(nv)) + anynulls)
         # TODO if ngroups is really big, shrink it
     end
-    (idx, starts) = DataArrays.groupsort_indexer(x, ngroups)
+    (idx, starts) = groupsort_indexer(x, ngroups)
     # Remove zero-length groupings
     starts = _uniqueofsorted(starts)
     ends = starts[2:end] - 1
@@ -159,15 +202,14 @@ Not meant to be constructed directly, see `groupby` abnd
 provided for a GroupApplied object.
 
 """
-type GroupApplied
+immutable GroupApplied{T<:AbstractDataFrame}
     gd::GroupedDataFrame
-    vals::Vector
+    vals::Vector{T}
 
-    function GroupApplied(gd, vals)
-        if length(gd) != length(vals)
-            error("GroupApplied requires keys and vals be of equal length.")
-        end
-        new(gd, vals)
+    @compat function (::Type{GroupApplied})(gd::GroupedDataFrame, vals::Vector)
+        length(gd) == length(vals) ||
+            throw(DimensionMismatch("GroupApplied requires keys and vals be of equal length (got $(length(gd)) and $(length(vals)))."))
+        new{eltype(vals)}(gd, vals)
     end
 end
 
@@ -178,10 +220,10 @@ end
 
 # map() sweeps along groups
 function Base.map(f::Function, gd::GroupedDataFrame)
-    GroupApplied(gd, AbstractDataFrame[wrap(f(d)) for d in gd])
+    GroupApplied(gd, [wrap(f(df)) for df in gd])
 end
 function Base.map(f::Function, ga::GroupApplied)
-    GroupApplied(ga.gd, AbstractDataFrame[wrap(f(d)) for d in ga.vals])
+    GroupApplied(ga.gd, [wrap(f(df)) for df in ga.vals])
 end
 
 wrap(df::AbstractDataFrame) = df
@@ -209,23 +251,21 @@ combine(ga::GroupApplied)
 df = DataFrame(a = repeat([1, 2, 3, 4], outer=[2]),
                b = repeat([2, 1], outer=[4]),
                c = randn(8))
-combine(map(d -> mean(d[:c]), gd))
+combine(map(d -> mean(dropnull(d[:c])), gd))
 ```
 
 """
 function combine(ga::GroupApplied)
     gd, vals = ga.gd, ga.vals
-    # Could be made shorter with a rep(x, lengths) function
-    # See JuliaLang/julia#16443
-    idx = Vector{Int}(sum(Int[size(val, 1) for val in vals]))
+    valscat = vcat(vals)
+    idx = Vector{Int}(size(valscat, 1))
     j = 0
-    for i in 1:length(vals)
-        n = size(vals[i], 1)
-        @inbounds idx[j + (1:n)] = gd.idx[gd.starts[i]]
+    @inbounds for (start, val) in zip(gd.starts, vals)
+        n = size(val, 1)
+        idx[j + (1:n)] = gd.idx[start]
         j += n
     end
-    ret = gd.parent[idx, gd.cols]
-    hcat!(ret, vcat(vals))
+    hcat!(gd.parent[idx, gd.cols], valscat)
 end
 
 
@@ -260,12 +300,14 @@ colwise(sum, groupby(df, :a))
 ```
 
 """
-colwise(f::Function, d::AbstractDataFrame) = Any[[f(d[idx])] for idx in 1:size(d, 2)]
+colwise(f::Function, d::AbstractDataFrame) = Any[vcat(f(d[idx])) for idx in 1:size(d, 2)]
 colwise(f::Function, gd::GroupedDataFrame) = map(colwise(f), gd)
 colwise(f::Function) = x -> colwise(f, x)
 colwise(f) = x -> colwise(f, x)
 # apply several functions to each column in a DataFrame
-colwise{T<:Function}(fns::Vector{T}, d::AbstractDataFrame) = Any[[f(d[idx])] for f in fns, idx in 1:size(d, 2)][:]
+colwise{T<:Function}(fns::Vector{T}, d::AbstractDataFrame) =
+    reshape(Any[vcat(f(d[idx])) for f in fns, idx in 1:size(d, 2)],
+            length(fns)*size(d, 2))
 colwise{T<:Function}(fns::Vector{T}, gd::GroupedDataFrame) = map(colwise(fns), gd)
 colwise{T<:Function}(fns::Vector{T}) = x -> colwise(fns, x)
 
@@ -299,7 +341,7 @@ notation can be used.
 
 ### Returns
 
-* `::DataFrame` 
+* `::DataFrame`
 
 ### Examples
 
@@ -308,11 +350,11 @@ df = DataFrame(a = repeat([1, 2, 3, 4], outer=[2]),
                b = repeat([2, 1], outer=[4]),
                c = randn(8))
 by(df, :a, d -> sum(d[:c]))
-by(df, :a, d -> 2 * d[:c])
-by(df, :a, d -> DataFrame(c_sum = sum(d[:c]), c_mean = mean(d[:c])))
-by(df, :a, d -> DataFrame(c = d[:c], c_mean = mean(d[:c])))
+by(df, :a, d -> 2 * dropnull(d[:c]))
+by(df, :a, d -> DataFrame(c_sum = sum(d[:c]), c_mean = mean(dropnull(d[:c]))))
+by(df, :a, d -> DataFrame(c = d[:c], c_mean = mean(dropnull(d[:c]))))
 by(df, [:a, :b]) do d
-    DataFrame(m = mean(d[:c]), v = var(d[:c]))
+    DataFrame(m = mean(dropnull(d[:c])), v = var(dropnull(d[:c])))
 end
 ```
 
@@ -347,7 +389,7 @@ same length.
 
 ### Returns
 
-* `::DataFrame` 
+* `::DataFrame`
 
 ### Examples
 
@@ -356,9 +398,9 @@ df = DataFrame(a = repeat([1, 2, 3, 4], outer=[2]),
                b = repeat([2, 1], outer=[4]),
                c = randn(8))
 aggregate(df, :a, sum)
-aggregate(df, :a, [sum, mean])
-aggregate(groupby(df, :a), [sum, mean])
-df |> groupby(:a) |> [sum, mean]   # equivalent
+aggregate(df, :a, [sum, x->mean(dropnull(x))])
+aggregate(groupby(df, :a), [sum, x->mean(dropnull(x))])
+df |> groupby(:a) |> [sum, x->mean(dropnull(x))]   # equivalent
 ```
 
 """
@@ -369,7 +411,7 @@ function aggregate{T<:Function}(d::AbstractDataFrame, fs::Vector{T})
 end
 
 # Applies aggregate to non-key cols of each SubDataFrame of a GroupedDataFrame
-aggregate(gd::GroupedDataFrame, fs::Function) = aggregate(gd, [fs])
+aggregate(gd::GroupedDataFrame, f::Function) = aggregate(gd, [f])
 function aggregate{T<:Function}(gd::GroupedDataFrame, fs::Vector{T})
     headers = _makeheaders(fs, _setdiff(_names(gd), gd.cols))
     combine(map(x -> _aggregate(without(x, gd.cols), fs, headers), gd))
@@ -386,8 +428,8 @@ end
 
 function _makeheaders{T<:Function}(fs::Vector{T}, cn::Vector{Symbol})
     fnames = _fnames(fs) # see other/utils.jl
-    scn = [string(x) for x in cn]
-    [Symbol("$(colname)_$(fname)") for fname in fnames, colname in scn][:]
+    reshape([Symbol(colname,'_',fname) for fname in fnames, colname in cn],
+            length(fnames)*length(cn))
 end
 
 function _aggregate{T<:Function}(d::AbstractDataFrame, fs::Vector{T}, headers::Vector{Symbol})
diff --git a/src/other/index.jl b/src/other/index.jl
index 4ba61fd94d..ef50b3b710 100644
--- a/src/other/index.jl
+++ b/src/other/index.jl
@@ -113,8 +113,10 @@ end
 
 Base.getindex(x::Index, idx::Symbol) = x.lookup[idx]
 Base.getindex(x::AbstractIndex, idx::Real) = @compat Int(idx)
-Base.getindex(x::AbstractIndex, idx::AbstractDataVector{Bool}) = getindex(x, convert(Array, idx, false))
-Base.getindex{T}(x::AbstractIndex, idx::AbstractDataVector{T}) = getindex(x, dropna(idx))
+Base.getindex(x::AbstractIndex, idx::AbstractVector{Nullable{Bool}}) =
+    getindex(x, convert(Vector{Bool}, idx, false))
+Base.getindex{T<:Nullable}(x::AbstractIndex, idx::AbstractVector{T}) =
+    getindex(x, dropnull(idx))
 Base.getindex(x::AbstractIndex, idx::AbstractVector{Bool}) = find(idx)
 Base.getindex(x::AbstractIndex, idx::Range) = [idx;]
 Base.getindex{T <: Real}(x::AbstractIndex, idx::AbstractVector{T}) = convert(Vector{Int}, idx)
diff --git a/src/other/utils.jl b/src/other/utils.jl
index 7227f7027d..a0ceef879f 100644
--- a/src/other/utils.jl
+++ b/src/other/utils.jl
@@ -54,7 +54,7 @@ function make_unique(names::Vector{Symbol}; allow_duplicates=true)
         name = names[i]
         in(name, seen) ? push!(dups, i) : push!(seen, name)
     end
-    
+
     if !allow_duplicates && length(dups) > 0
         d = unique(names[dups])
         msg = """Duplicate variable names: $d.
@@ -99,50 +99,55 @@ function gennames(n::Integer)
     return res
 end
 
+
 #' @description
 #'
-#' Count the number of missing values in an Array.
+#' Count the number of null values in an array.
 #'
-#' NOTE: This function always returns 0.
+#' @field a::AbstractArray The array whose missing values are to be counted.
 #'
-#' @field a::Array The Array whose missing values are to be counted.
-#'
-#' @returns count::Int The number of missing values in `a`.
+#' @returns count::Int The number of null values in `a`.
 #'
 #' @examples
 #'
-#' DataFrames.countna([1, 2, 3])
-countna(a::Array) = 0
+#' DataFrames.countnull([1, 2, 3])
+function countnull(a::AbstractArray)
+    res = 0
+    for x in a
+        res += _isnull(x)
+    end
+    return res
+end
 
 #' @description
 #'
-#' Count the number of missing values in a DataArray.
+#' Count the number of missing values in a NullableArray.
 #'
-#' @field da::DataArray The DataArray whose missing values are to be counted.
+#' @field a::NullableArray The NullableArray whose missing values are to be counted.
 #'
-#' @returns count::Int The number of missing values in `a`.
+#' @returns count::Int The number of null values in `a`.
 #'
 #' @examples
 #'
-#' DataFrames.countna(@data([1, 2, 3]))
-countna(da::DataArray) = sum(da.na)
+#' DataFrames.countnull(NullableArray([1, 2, 3]))
+countnull(a::NullableArray) = sum(a.isnull)
 
 #' @description
 #'
-#' Count the number of missing values in a PooledDataArray.
+#' Count the number of missing values in a NullableCategoricalArray.
 #'
-#' @field pda::PooledDataArray The PooledDataArray whose missing values
+#' @field na::CategoricalArray The CategoricalArray whose missing values
 #'        are to be counted.
 #'
-#' @returns count::Int The number of missing values in `a`.
+#' @returns count::Int The number of null values in `a`.
 #'
 #' @examples
 #'
-#' DataFrames.countna(@pdata([1, 2, 3]))
-function countna(da::PooledDataArray)
+#' DataFrames.countnull(CategoricalArray([1, 2, 3]))
+function countnull(a::CategoricalArray)
     res = 0
-    for i in 1:length(da)
-        res += da.refs[i] == 0
+    for x in a.refs
+        res += x == 0
     end
     return res
 end
@@ -193,3 +198,6 @@ function _fnames{T<:Function}(fs::Vector{T})
     end
     names
 end
+
+_isnull(x::Any) = false
+_isnull(x::Nullable) = isnull(x)
diff --git a/src/statsmodels/contrasts.jl b/src/statsmodels/contrasts.jl
index 095ea5da0d..47e7b97434 100644
--- a/src/statsmodels/contrasts.jl
+++ b/src/statsmodels/contrasts.jl
@@ -140,19 +140,21 @@ end
 
 # Methods for constructing ContrastsMatrix from data. These are called in
 # ModelFrame constructor and setcontrasts!.
-# TODO: add methods for new categorical types
-
-ContrastsMatrix(C::AbstractContrasts, v::PooledDataArray) =
+ContrastsMatrix(C::AbstractContrasts,
+                v::Union{CategoricalArray, NullableCategoricalArray}) =
     ContrastsMatrix(C, levels(v))
-ContrastsMatrix{C <: AbstractContrasts}(c::Type{C}, col::PooledDataArray) =
+ContrastsMatrix{C <: AbstractContrasts}(c::Type{C},
+                                        col::Union{CategoricalArray, NullableCategoricalArray}) =
     throw(ArgumentError("contrast types must be instantiated (use $c() instead of $c)"))
+
 # given an existing ContrastsMatrix, check that all of the levels present in the
 # data are present in the contrasts. Note that this behavior is different from the
 # ContrastsMatrix constructor, which requires that the levels be exactly the same.
 # This method exists to support things like `predict` that can operate on new data
 # which may contain only a subset of the original data's levels. Checking here
 # (instead of in `modelmat_cols`) allows an informative error message.
-function ContrastsMatrix(c::ContrastsMatrix, col::PooledDataArray)
+function ContrastsMatrix(c::ContrastsMatrix,
+                         col::Union{CategoricalArray, NullableCategoricalArray})
     if !isempty(setdiff(levels(col), c.levels))
         throw(ArgumentError("there are levels in data that are not in ContrastsMatrix: " *
                             "$(setdiff(levels(col), c.levels))" *
@@ -171,7 +173,8 @@ nullify(x::Nullable) = x
 nullify(x) = Nullable(x)
 
 # Making a contrast type T only requires that there be a method for
-# contrasts_matrix(T, v::PooledDataArray). The rest is boilerplate.
+# contrasts_matrix(T, v::Union{CategoricalArray, NullableCategoricalArray}).
+# The rest is boilerplate.
 for contrastType in [:DummyCoding, :EffectsCoding, :HelmertCoding]
     @eval begin
         type $contrastType <: AbstractContrasts
diff --git a/src/statsmodels/formula.jl b/src/statsmodels/formula.jl
index 40f1cdb949..79777a5cab 100644
--- a/src/statsmodels/formula.jl
+++ b/src/statsmodels/formula.jl
@@ -40,6 +40,8 @@ type Terms
     intercept::Bool       # is there an intercept column in the model matrix?
 end
 
+Base.:(==)(t1::Terms, t2::Terms) = all(getfield(t1, f)==getfield(t2, f) for f in fieldnames(t1))
+
 type ModelFrame
     df::AbstractDataFrame
     terms::Terms
@@ -85,19 +87,26 @@ function dospecials(ex::Expr)
     if !(a1 in specials) return ex end
     excp = copy(ex)
     excp.args = vcat(a1,map(dospecials, ex.args[2:end]))
-    if a1 != :* return excp end
-    aa = excp.args
-    a2 = aa[2]
-    a3 = aa[3]
-    if length(aa) > 3
-        excp.args = vcat(a1, aa[3:end])
-        a3 = dospecials(excp)
+    if a1 == :-
+        a2, a3 = excp.args[2:3]
+        a3 == 1 || error("invalid expression $ex; subtraction only supported for -1")
+        return :($a2 + -1)
+    elseif a1 == :*
+        aa = excp.args
+        a2 = aa[2]
+        a3 = aa[3]
+        if length(aa) > 3
+            excp.args = vcat(a1, aa[3:end])
+            a3 = dospecials(excp)
+        end
+        ## this order of expansion gives the R-style ordering of interaction
+        ## terms (after sorting in increasing interaction order) for higher-
+        ## order interaction terms (e.g. x1 * x2 * x3 should expand to x1 +
+        ## x2 + x3 + x1&x2 + x1&x3 + x2&x3 + x1&x2&x3)
+        :($a2 + $a2 & $a3 + $a3)
+    else
+        excp
     end
-    ## this order of expansion gives the R-style ordering of interaction
-    ## terms (after sorting in increasing interaction order) for higher-
-    ## order interaction terms (e.g. x1 * x2 * x3 should expand to x1 +
-    ## x2 + x3 + x1&x2 + x1&x3 + x2&x3 + x1&x2&x3)
-    :($a2 + $a2 & $a3 + $a3)
 end
 dospecials(a::Any) = a
 
@@ -216,27 +225,16 @@ function Terms(f::Formula)
     Terms(tt, ev, facs, non_redundants, oo, haslhs, !any(noint))
 end
 
-## Default NA handler.  Others can be added as keyword arguments
-function na_omit(df::DataFrame)
+## Default NULL handler.  Others can be added as keyword arguments
+function null_omit(df::DataFrame)
     cc = complete_cases(df)
     df[cc,:], cc
 end
 
-## Trim the pool field of da to only those levels that occur in the refs
-function dropunusedlevels!(da::PooledDataArray)
-    rr = da.refs
-    uu = unique(rr)
-    length(uu) == length(da.pool) && return da
-    T = eltype(rr)
-    su = sort!(uu)
-    dict = Dict(zip(su, one(T):convert(T, length(uu))))
-    da.refs = map(x -> dict[x], rr)
-    da.pool = da.pool[uu]
-    da
-end
-dropunusedlevels!(x) = x
+_droplevels!(x::Any) = x
+_droplevels!(x::Union{CategoricalArray, NullableCategoricalArray}) = droplevels!(x)
 
-is_categorical(::PooledDataArray) = true
+is_categorical(::Union{CategoricalArray, NullableCategoricalArray}) = true
 is_categorical(::Any) = false
 
 ## Check for non-redundancy of columns.  For instance, if x is a factor with two
@@ -285,16 +283,11 @@ end
 
 const DEFAULT_CONTRASTS = DummyCoding
 
-function ModelFrame(trms::Terms, d::AbstractDataFrame;
-                    contrasts::Dict = Dict())
-    df, msng = na_omit(DataFrame(map(x -> d[x], trms.eterms)))
-    names!(df, convert(Vector{Symbol}, map(string, trms.eterms)))
-    for c in eachcol(df) dropunusedlevels!(c[2]) end
-
-    ## Set up contrasts:
-    ## Combine actual DF columns and contrast types if necessary to compute the
-    ## actual contrasts matrices, levels, and term names (using DummyCoding
-    ## as the default)
+## Set up contrasts:
+## Combine actual DF columns and contrast types if necessary to compute the
+## actual contrasts matrices, levels, and term names (using DummyCoding
+## as the default)
+function evalcontrasts(df::AbstractDataFrame, contrasts::Dict = Dict())
     evaledContrasts = Dict()
     for (term, col) in eachcol(df)
         is_categorical(col) || continue
@@ -303,6 +296,16 @@ function ModelFrame(trms::Terms, d::AbstractDataFrame;
                                                 DEFAULT_CONTRASTS(),
                                                 col)
     end
+    return evaledContrasts
+end
+
+function ModelFrame(trms::Terms, d::AbstractDataFrame;
+                    contrasts::Dict = Dict())
+    df, msng = null_omit(DataFrame(map(x -> d[x], trms.eterms)))
+    names!(df, convert(Vector{Symbol}, map(string, trms.eterms)))
+    for c in eachcol(df) _droplevels!(c[2]) end
+
+    evaledContrasts = evalcontrasts(df, contrasts)
 
     ## Check for non-redundant terms, modifying terms in place
     check_non_redundancy!(trms, df)
@@ -310,6 +313,7 @@ function ModelFrame(trms::Terms, d::AbstractDataFrame;
     ModelFrame(df, trms, msng, evaledContrasts)
 end
 
+ModelFrame(df::AbstractDataFrame, term::Terms, msng::BitArray) = ModelFrame(df, term, msng, evalcontrasts(df))
 ModelFrame(f::Formula, d::AbstractDataFrame; kwargs...) = ModelFrame(Terms(f), d; kwargs...)
 ModelFrame(ex::Expr, d::AbstractDataFrame; kwargs...) = ModelFrame(Formula(ex), d; kwargs...)
 
@@ -348,8 +352,11 @@ function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, name::Symbol, mf::Mode
     end
 end
 
-modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::DataVector) = convert(T, reshape(v.data, length(v), 1))
-modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::Vector) = convert(T, reshape(v, length(v), 1))
+modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::AbstractVector) =
+    convert(T, reshape(v, length(v), 1))
+# FIXME: this inefficient method should not be needed, cf. JuliaLang/julia#18264
+modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::NullableVector) =
+    convert(T, Matrix(reshape(v, length(v), 1)))
 
 """
     modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
@@ -357,16 +364,21 @@ modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::Vector) = convert(T, reshape
 Construct `ModelMatrix` columns of type `T` based on specified contrasts, ensuring that
 levels align properly.
 """
-function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T}, v::PooledDataVector, contrast::ContrastsMatrix)
+function modelmat_cols{T<:AbstractFloatMatrix}(::Type{T},
+                                               v::Union{CategoricalVector, NullableCategoricalVector},
+                                               contrast::ContrastsMatrix)
     ## make sure the levels of the contrast matrix and the categorical data
     ## are the same by constructing a re-indexing vector. Indexing into
     ## reindex with v.refs will give the corresponding row number of the
     ## contrast matrix
     reindex = [findfirst(contrast.levels, l) for l in levels(v)]
     contrastmatrix = convert(T, contrast.matrix)
-    return contrastmatrix[reindex[v.refs], :]
+    return indexrows(contrastmatrix, reindex[v.refs])
 end
 
+indexrows(m::SparseMatrixCSC, ind::Vector{Int}) = m'[:, ind]'
+indexrows(m::AbstractMatrix, ind::Vector{Int}) = m[ind, :]
+
 """
     expandcols{T<:AbstractFloatMatrix}(trm::Vector{T})
 Create pairwise products of columns from a vector of matrices
@@ -421,7 +433,6 @@ function dropresponse!(trms::Terms)
     end
 end
 
-
 """
     ModelMatrix{T<:AbstractFloatMatrix}(mf::ModelFrame)
 Create a `ModelMatrix` of type `T` (default `Matrix{Float64}`) from the
@@ -503,7 +514,8 @@ ModelMatrix(mf::ModelFrame) = ModelMatrix{Matrix{Float64}}(mf)
     termnames(term::Symbol, col)
 Returns a vector of strings with the names of the coefficients
 associated with a term.  If the column corresponding to the term
-is not a `PooledDataArray` a one-element vector is returned.
+is not a `CategoricalArray` or `NullableCategoricalArray`,
+a one-element vector is returned.
 """
 termnames(term::Symbol, col) = [string(term)]
 function termnames(term::Symbol, mf::ModelFrame; non_redundant::Bool = false)
diff --git a/src/statsmodels/statsmodel.jl b/src/statsmodels/statsmodel.jl
index 3424dc2c97..3dd5768d35 100644
--- a/src/statsmodels/statsmodel.jl
+++ b/src/statsmodels/statsmodel.jl
@@ -62,7 +62,7 @@ typealias DataFrameModels @compat(Union{DataFrameStatisticalModel, DataFrameRegr
 @delegate DataFrameModels.model [StatsBase.coef, StatsBase.confint,
                                  StatsBase.deviance, StatsBase.nulldeviance,
                                  StatsBase.loglikelihood, StatsBase.nullloglikelihood,
-                                 StatsBase.df, StatsBase.df_residual, StatsBase.nobs,
+                                 StatsBase.dof, StatsBase.dof_residual, StatsBase.nobs,
                                  StatsBase.stderr, StatsBase.vcov]
 @delegate DataFrameRegressionModel.model [StatsBase.residuals, StatsBase.model_response,
                                           StatsBase.predict, StatsBase.predict!]
@@ -81,7 +81,7 @@ function StatsBase.predict(mm::DataFrameRegressionModel, df::AbstractDataFrame;
     mf = ModelFrame(newTerms, df; contrasts = mm.mf.contrasts)
     newX = ModelMatrix(mf).m
     yp = predict(mm, newX; kwargs...)
-    out = DataArray(eltype(yp), size(df, 1))
+    out = NullableArray(eltype(yp), size(df, 1))
     out[mf.msng] = yp
     return(out)
 end
diff --git a/test/REQUIRE b/test/REQUIRE
index 22fc9ebb18..84bc366b4d 100644
--- a/test/REQUIRE
+++ b/test/REQUIRE
@@ -2,3 +2,4 @@ Compat 0.9.0
 DataStructures
 RDatasets # can be removed when deprecated.jl doesn't test read_rda anymore
 RData
+LaTeXStrings
diff --git a/test/cat.jl b/test/cat.jl
index af45afd77b..1ec8cc2b4a 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -6,13 +6,13 @@ module TestCat
     # hcat
     #
 
-    dvint = @data([1, 2, NA, 4])
-    dvstr = @data(["one", "two", NA, "four"])
+    nvint = NullableArray(Nullable{Int}[1, 2, Nullable(), 4])
+    nvstr = NullableArray(Nullable{String}["one", "two", Nullable(), "four"])
 
-    df2 = DataFrame(Any[dvint, dvstr])
-    df3 = DataFrame(Any[dvint])
+    df2 = DataFrame(Any[nvint, nvstr])
+    df3 = DataFrame(Any[nvint])
     df4 = convert(DataFrame, [1:4 1:4])
-    df5 = DataFrame(Any[@data([1,2,3,4]), dvstr])
+    df5 = DataFrame(Any[NullableArray([1,2,3,4]), nvstr])
 
     dfh = hcat(df3, df4)
     @test size(dfh, 2) == 3
@@ -38,18 +38,40 @@ module TestCat
     # Assignment of rows
     df[1, :] = df[1, :]
     df[1:2, :] = df[1:2, :]
+    df[[true,false,false,true], :] = df[2:3, :]
 
-    # Broadcasting assignment of rows
+    # Scalar broadcasting assignment of rows
     df[1, :] = 1
+    df[1:2, :] = 1
+    df[[true,false,false,true], :] = 3
+
+    # Vector broadcasting assignment of rows
+    df[1:2, :] = [2,3]
+    df[[true,false,false,true], :] = [2,3]
 
     # Assignment of columns
     df[1] = zeros(4)
+    df[:, 2] = ones(4)
 
     # Broadcasting assignment of columns
     df[:, 1] = 1
     df[1] = 3
     df[:x3] = 2
 
+    # assignment of subframes
+    df[1, 1:2] = df[2, 2:3]
+    df[1:2, 1:2] = df[2:3, 2:3]
+    df[[true,false,false,true], 2:3] = df[1:2,1:2]
+
+    # scalar broadcasting assignment of subframes
+    df[1, 1:2] = 3
+    df[1:2, 1:2] = 3
+    df[[true,false,false,true], 2:3] = 3
+
+    # vector broadcasting assignment of subframes
+    df[1:2, 1:2] = [3,2]
+    df[[true,false,false,true], 2:3] = [2,3]
+
     vcat([])
     vcat(null_df)
     vcat(null_df, null_df)
@@ -78,37 +100,56 @@ module TestCat
     dfr = vcat(df2, df3)
     @test size(dfr) == (8,2)
     @test names(df2) == names(dfr)
-    @test isna(dfr[8,:x2])
+    @test isnull(dfr[8,:x2])
 
     # Eltype promotion
-    @test eltypes(vcat(DataFrame(a = [1]), DataFrame(a = [2.1]))) == [Float64]
-    @test eltypes(vcat(DataFrame(a = [NA]), DataFrame(a = [2.1]))) == [Float64]
+    # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
+    if VERSION >= v"0.5.0-dev"
+        @test eltypes(vcat(DataFrame(a = [1]), DataFrame(a = [2.1]))) == [Nullable{Float64}]
+        @test eltypes(vcat(DataFrame(a = NullableArray(Int, 1)), DataFrame(a = [2.1]))) == [Nullable{Float64}]
+    else
+        @test eltypes(vcat(DataFrame(a = [1]), DataFrame(a = [2.1]))) == [Nullable{Any}]
+        @test eltypes(vcat(DataFrame(a = NullableArray(Int, 1)), DataFrame(a = [2.1]))) == [Nullable{Any}]
+    end
 
     # Minimal container type promotion
-    dfa = DataFrame(a = @pdata([1, 2, 2]))
-    dfb = DataFrame(a = @pdata([2, 3, 4]))
-    dfc = DataFrame(a = @data([2, 3, 4]))
+    dfa = DataFrame(a = CategoricalArray([1, 2, 2]))
+    dfb = DataFrame(a = CategoricalArray([2, 3, 4]))
+    dfc = DataFrame(a = NullableArray([2, 3, 4]))
     dfd = DataFrame(Any[2:4], [:a])
-    @test vcat(dfa, dfb)[:a] == @pdata([1, 2, 2, 2, 3, 4])
-    @test vcat(dfa, dfc)[:a] == @pdata([1, 2, 2, 2, 3, 4])
+    dfe = DataFrame(b = CategoricalArray([2, 3, 4]))
+    dfab = vcat(dfa, dfb)
+    dfac = vcat(dfa, dfc)
+    dfabcd = vcat(dfa, dfc, dfe)
+    @test isequal(dfab[:a], Nullable{Int}[1, 2, 2, 2, 3, 4])
+    @test isequal(dfac[:a], Nullable{Int}[1, 2, 2, 2, 3, 4])
+    @test isa(dfab[:a], NullableCategoricalVector{Int})
+    @test isa(dfabcd[:a], NullableCategoricalVector{Int})
+    @test isa(dfabcd[:b], NullableCategoricalVector{Int})
+    # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
+    if VERSION >= v"0.5.0-dev"
+        @test isa(dfac[:a], NullableCategoricalVector{Int})
+    else
+        @test isa(dfac[:a], NullableCategoricalVector{Any})
+    end
     # ^^ container may flip if container promotion happens in Base/DataArrays
     dc = vcat(dfd, dfc)
-    @test vcat(dfc, dfd) == dc
+    @test isequal(vcat(dfc, dfd), dc)
 
     # Zero-row DataFrames
     dfc0 = similar(dfc, 0)
-    @test vcat(dfd, dfc0, dfc) == dc
+    @test isequal(vcat(dfd, dfc0, dfc), dc)
     @test eltypes(vcat(dfd, dfc0)) == eltypes(dc)
 
     # Missing columns
     rename!(dfd, :a, :b)
-    dfda = DataFrame(b = @data([2, 3, 4, NA, NA, NA]),
-                     a = @pdata([NA, NA, NA, 1, 2, 2]))
+    dfda = DataFrame(b = NullableArray(Nullable{Int}[2, 3, 4, Nullable(), Nullable(), Nullable()]),
+                     a = NullableCategoricalVector(Nullable{Int}[Nullable(), Nullable(), Nullable(), 1, 2, 2]))
     @test isequal(vcat(dfd, dfa), dfda)
 
     # Alignment
     @test isequal(vcat(dfda, dfd, dfa), vcat(dfda, dfda))
 
     # vcat should be able to concatenate different implementations of AbstractDataFrame (PR #944)
-    @test vcat(sub(DataFrame(A=1:3),2),DataFrame(A=4:5)) == DataFrame(A=[2,4,5])
+    @test isequal(vcat(sub(DataFrame(A=1:3),2),DataFrame(A=4:5)), DataFrame(A=[2,4,5]))
 end
diff --git a/test/constructors.jl b/test/constructors.jl
index cba2e4eeed..600e067e25 100644
--- a/test/constructors.jl
+++ b/test/constructors.jl
@@ -10,27 +10,23 @@ module TestConstructors
     @test isequal(df.columns, Any[])
     @test isequal(df.colindex, Index())
 
-    df = DataFrame(Any[data(zeros(3)), data(ones(3))],
+    df = DataFrame(Any[NullableCategoricalVector(zeros(3)),
+                       NullableCategoricalVector(ones(3))],
                    Index([:x1, :x2]))
     @test size(df, 1) == 3
     @test size(df, 2) == 2
 
-    @test isequal(df,
-                  DataFrame(Any[data(zeros(3)), data(ones(3))]))
-    @test isequal(df,
-                  DataFrame(x1 = [0.0, 0.0, 0.0],
-                            x2 = [1.0, 1.0, 1.0]))
+    @test isequal(df, DataFrame(Any[NullableCategoricalVector(zeros(3)),
+                                    NullableCategoricalVector(ones(3))]))
+    @test isequal(df, DataFrame(x1 = [0.0, 0.0, 0.0],
+                                x2 = [1.0, 1.0, 1.0]))
 
     df2 = convert(DataFrame, [0.0 1.0;
                               0.0 1.0;
                               0.0 1.0])
     names!(df2, [:x1, :x2])
-    @test isequal(df, df2)
-
-    @test isequal(df,
-                  convert(DataFrame, [0.0 1.0;
-                                      0.0 1.0;
-                                      0.0 1.0]))
+    @test isequal(df[:x1], NullableArray(df2[:x1]))
+    @test isequal(df[:x2], NullableArray(df2[:x2]))
 
     @test isequal(df, DataFrame(x1 = [0.0, 0.0, 0.0],
                                 x2 = [1.0, 1.0, 1.0]))
@@ -40,15 +36,12 @@ module TestConstructors
 
     df = DataFrame(Int, 2, 2)
     @test size(df) == (2, 2)
-    @test all(eltypes(df) .== [Int, Int])
+    @test eltypes(df) == [Nullable{Int}, Nullable{Int}]
 
     df = DataFrame([Int, Float64], [:x1, :x2], 2)
     @test size(df) == (2, 2)
-    @test all(eltypes(df) .== Any[Int, Float64])
+    @test eltypes(df) == [Nullable{Int}, Nullable{Float64}]
 
     @test isequal(df, DataFrame([Int, Float64], 2))
 
-
-
-
 end
diff --git a/test/contrasts.jl b/test/contrasts.jl
index 1ff2fed934..0a6b76671b 100644
--- a/test/contrasts.jl
+++ b/test/contrasts.jl
@@ -4,7 +4,7 @@ using Base.Test
 using DataFrames
 
 
-d = DataFrame(x = @pdata( [:a, :b, :c, :a, :a, :b] ))
+d = DataFrame(x = CategoricalVector([:a, :b, :c, :a, :a, :b]))
 
 mf = ModelFrame(Formula(nothing, :x), d)
 
@@ -75,7 +75,7 @@ setcontrasts!(mf, x = HelmertCoding())
 @test_throws ArgumentError setcontrasts!(mf, x = EffectsCoding(levels = ["a", "b", "c"]))
 
 # Missing data is handled gracefully, dropping columns when a level is lost
-d[3, :x] = NA
+d[3, :x] = Nullable()
 mf_missing = ModelFrame(Formula(nothing, :x), d, contrasts = Dict(:x => EffectsCoding()))
 @test ModelMatrix(mf_missing).m == [1 -1
                                     1  1
diff --git a/test/conversions.jl b/test/conversions.jl
index adf1067c69..1a607b2cac 100644
--- a/test/conversions.jl
+++ b/test/conversions.jl
@@ -7,14 +7,17 @@ module TestConversions
     df[:A] = 1:5
     df[:B] = [:A, :B, :C, :D, :E]
     @test isa(convert(Array, df), Matrix{Any})
-    @test convert(Array, df) == convert(Array, convert(DataArray, df))
+    @test convert(Array, df) == convert(Array, convert(NullableArray, df))
     @test isa(convert(Array{Any}, df), Matrix{Any})
 
     df = DataFrame()
     df[:A] = 1:5
     df[:B] = 1.0:5.0
-    @test isa(convert(Array, df), Matrix{Real})
-    @test convert(Array, df) == convert(Array, convert(DataArray, df))
+    # Fails on Julia 0.4 since promote_type(Nullable{Int}, Nullable{Float64}) gives Nullable{T}
+    if VERSION >= v"0.5.0-dev"
+        @test isa(convert(Array, df), Matrix{Float64})
+    end
+    @test convert(Array, df) == convert(Array, convert(NullableArray, df))
     @test isa(convert(Array{Any}, df), Matrix{Any})
     @test isa(convert(Array{Float64}, df), Matrix{Float64})
 
@@ -25,24 +28,24 @@ module TestConversions
     aa = convert(Array{Any}, df)
     ai = convert(Array{Int}, df)
     @test isa(a, Matrix{Float64})
-    @test a == convert(Array, convert(DataArray, df))
+    @test a == convert(Array, convert(NullableArray, df))
     @test a == convert(Matrix, df)
     @test isa(aa, Matrix{Any})
     @test aa == convert(Matrix{Any}, df)
     @test isa(ai, Matrix{Int})
     @test ai == convert(Matrix{Int}, df)
 
-    df[1,1] = NA
+    df[1,1] = Nullable()
     @test_throws ErrorException convert(Array, df)
-    da = convert(DataArray, df)
-    daa = convert(DataArray{Any}, df)
-    dai = convert(DataArray{Int}, df)
-    @test isa(da, DataMatrix{Float64})
-    @test isequal(da, convert(DataMatrix, df))
-    @test isa(daa, DataMatrix{Any})
-    @test isequal(daa, convert(DataMatrix{Any}, df))
-    @test isa(dai, DataMatrix{Int})
-    @test isequal(dai, convert(DataMatrix{Int}, df))
+    na = convert(NullableArray, df)
+    naa = convert(NullableArray{Any}, df)
+    nai = convert(NullableArray{Int}, df)
+    @test isa(na, NullableMatrix{Float64})
+    @test isequal(na, convert(NullableMatrix, df))
+    @test isa(naa, NullableMatrix{Any})
+    @test isequal(naa, convert(NullableMatrix{Any}, df))
+    @test isa(nai, NullableMatrix{Int})
+    @test isequal(nai, convert(NullableMatrix{Int}, df))
 
     a = [1.0,2.0]
     b = [-0.1,3]
@@ -52,25 +55,25 @@ module TestConversions
     df = convert(DataFrame,di)
     @test isa(df,DataFrame)
     @test names(df) == Symbol[x for x in sort(collect(keys(di)))]
-    @test df[:a] == a
-    @test df[:b] == b
-    @test df[:c] == c
+    @test isequal(df[:a], NullableArray(a))
+    @test isequal(df[:b], NullableArray(b))
+    @test isequal(df[:c], NullableArray(c))
 
     od = OrderedDict("c"=>c, "a"=>a, "b"=>b)
     df = convert(DataFrame,od)
     @test isa(df, DataFrame)
     @test names(df) == Symbol[x for x in keys(od)]
-    @test df[:a] == a
-    @test df[:b] == b
-    @test df[:c] == c
+    @test isequal(df[:a], NullableArray(a))
+    @test isequal(df[:b], NullableArray(b))
+    @test isequal(df[:c], NullableArray(c))
 
     sd = SortedDict("c"=>c, "a"=>a, "b"=>b)
     df = convert(DataFrame,sd)
     @test isa(df, DataFrame)
     @test names(df) == Symbol[x for x in keys(sd)]
-    @test df[:a] == a
-    @test df[:b] == b
-    @test df[:c] == c
+    @test isequal(df[:a], NullableArray(a))
+    @test isequal(df[:b], NullableArray(b))
+    @test isequal(df[:c], NullableArray(c))
 
     a = [1.0]
     di = Dict("a"=>a, "b"=>b, "c"=>c)
diff --git a/test/data.jl b/test/data.jl
index d0fca036fe..1fc3f217cb 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -4,24 +4,24 @@ module TestData
     using DataFrames
     using Compat
 
-    #test_group("DataVector creation")
-    dvint = @data([1, 2, NA, 4])
-    dvint2 = data([5:8;])
-    dvint3 = data(5:8)
-    dvflt = @data([1.0, 2, NA, 4])
-    dvstr = @data(["one", "two", NA, "four"])
-    dvdict = DataArray(Dict, 4)    # for issue #199
+    #test_group("NullableArray creation")
+    nvint = NullableArray(Nullable{Int}[1, 2, Nullable(), 4])
+    nvint2 = NullableArray(5:8)
+    nvint3 = NullableArray(5:8)
+    nvflt = NullableArray(Nullable{Float64}[1.0, 2.0, Nullable(), 4.0])
+    nvstr = NullableArray(Nullable{Compat.ASCIIString}["one", "two", Nullable(), "four"])
+    dvdict = NullableArray(Dict, 4)    # for issue #199
 
     #test_group("constructors")
-    df1 = DataFrame(Any[dvint, dvstr], [:Ints, :Strs])
-    df2 = DataFrame(Any[dvint, dvstr])
-    df3 = DataFrame(Any[dvint])
+    df1 = DataFrame(Any[nvint, nvstr], [:Ints, :Strs])
+    df2 = DataFrame(Any[nvint, nvstr])
+    df3 = DataFrame(Any[nvint])
     df4 = convert(DataFrame, [1:4 1:4])
-    df5 = DataFrame(Any[@data([1,2,3,4]), dvstr])
-    df6 = DataFrame(Any[dvint, dvint, dvstr], [:A, :B, :C])
-    df7 = DataFrame(x = dvint, y = dvstr)
+    df5 = DataFrame(Any[NullableArray([1,2,3,4]), nvstr])
+    df6 = DataFrame(Any[nvint, nvint, nvstr], [:A, :B, :C])
+    df7 = DataFrame(x = nvint, y = nvstr)
     @test size(df7) == (4, 2)
-    @test isequal(df7[:x], dvint)
+    @test isequal(df7[:x], nvint)
 
     #test_group("description functions")
     @test size(df6, 1) == 4
@@ -31,10 +31,10 @@ module TestData
     @test names(df7) == [:x, :y]
 
     #test_group("ref")
-    @test df6[2, 3] == "two"
-    @test isna(df6[3, 3])
-    @test df6[2, :C] == "two"
-    @test isequal(df6[:B], dvint)
+    @test isequal(df6[2, 3], Nullable("two"))
+    @test isnull(df6[3, 3])
+    @test isequal(df6[2, :C], Nullable("two"))
+    @test isequal(df6[:B], nvint)
     @test size(df6[[2,3]], 2) == 2
     @test size(df6[2,:], 1) == 1
     @test size(df6[[1, 3], [1, 3]]) == (2, 2)
@@ -43,17 +43,17 @@ module TestData
     # lots more to do
 
     #test_group("assign")
-    df6[3] = @data(["un", "deux", "troix", "quatre"])
-    @test df6[1, 3] == "un"
+    df6[3] = NullableArray(["un", "deux", "troix", "quatre"])
+    @test isequal(df6[1, 3], Nullable("un"))
     df6[:B] = [4, 3, 2, 1]
-    @test df6[1,2] == 4
+    @test isequal(df6[1,2], Nullable(4))
     df6[:D] = [true, false, true, false]
-    @test df6[1,4] == true
+    @test isequal(df6[1,4], Nullable(true))
     delete!(df6, :D)
     @test names(df6) == [:A, :B, :C]
     @test size(df6, 2) == 3
 
-    #test_group("NA handling")
+    #test_group("null handling")
     @test nrow(df5[complete_cases(df5), :]) == 3
 
     #test_context("SubDataFrames")
@@ -68,7 +68,7 @@ module TestData
     @test size(sdf6d) == (2,1)
 
     #test_group("ref")
-    @test sdf6a[1,2] == 4
+    @test isequal(sdf6a[1,2], Nullable(4))
 
     #test_context("Within")
     #test_group("Associative")
@@ -77,35 +77,37 @@ module TestData
     srand(1)
     N = 20
     #Cast to Int64 as rand() behavior differs between Int32/64
-    d1 = pdata(rand(@compat(map(Int64, 1:2)), N))
-    d2 = (@pdata ["A", "B", NA])[rand(@compat(map(Int64, 1:3)), N)]
-    d3 = data(randn(N))
-    d4 = data(randn(N))
+    d1 = NullableArray(rand(map(Int64, 1:2), N))
+    d2 = NullableCategoricalArray(Nullable{String}["A", "B", Nullable()])[rand(map(Int64, 1:3), N)]
+    d3 = NullableArray(randn(N))
+    d4 = NullableArray(randn(N))
     df7 = DataFrame(Any[d1, d2, d3], [:d1, :d2, :d3])
 
     #test_group("groupby")
     gd = groupby(df7, :d1)
     @test length(gd) == 2
-    # @test isequal(gd[2]["d2"], PooledDataVector["A", "B", NA, "A", NA, NA, NA, NA])
-    @test sum(gd[2][:d3]) == sum(df7[:d3][dropna(df7[:d1] .== 2)])
+    # @test isequal(gd[2]["d2"], CategoricalVector["A", "B", Nullable(), "A", Nullable(), Nullable(), Nullable(), Nullable()])
+    @test isequal(sum(gd[2][:d3]), sum(df7[:d3][Vector(df7[:d1]) .== 2]))
 
     g1 = groupby(df7, [:d1, :d2])
     g2 = groupby(df7, [:d2, :d1])
-    @test sum(g1[1][:d3]) == sum(g2[1][:d3])
+    @test isequal(sum(g1[1][:d3]), sum(g2[1][:d3]))
 
-    res = 0.0
+    res = Nullable(0.0)
     for x in g1
         res += sum(x[:d1])
     end
-    @test res == sum(df7[:d1])
+    @test isequal(res, sum(df7[:d1]))
+
+    @test aggregate(DataFrame(a=1), identity) == DataFrame(a_identity=1)
 
     df8 = aggregate(df7[[1, 3]], sum)
-    @test df8[1, :d1_sum] == sum(df7[:d1])
+    @test isequal(df8[1, :d1_sum], sum(df7[:d1]))
 
     df8 = aggregate(df7, :d2, [sum, length])
     @test size(df8, 1) == 3
     @test size(df8, 2) == 5
-    @test df8[2, :d1_length] == 4
+    @test isequal(df8[2, :d1_length], Nullable(4))
     @test isequal(df8, aggregate(groupby(df7, :d2), [sum, length]))
 
     df9 = df7 |> groupby([:d2]) |> [sum, length]
@@ -189,11 +191,17 @@ module TestData
                     v2 = randn(5))
 
     m1 = join(df1, df2, on = :a)
-    @test isequal(m1[:a], @data([1, 2, 3, 4, 5]))
+    @test isequal(m1[:a], NullableArray([1, 2, 3, 4, 5]))
     # TODO: Re-enable
-    # m2 = join(df1, df2, on = :a, kind = :outer)
-    # @test isequal(m2[:b2], DataVector["A", "B", "B", "B", "B", NA, NA, NA, NA, NA])
-    # @test isequal(m2[:b2], DataVector["B", "B", "B", "C", "B", NA, NA, NA, NA, NA])
+    m2 = join(df1, df2, on = :a, kind = :outer)
+    # @test isequal(m2[:b2],
+    #               NullableArray(Nullable{String}["A", "B", "B", "B", "B",
+    #                                              Nullable(), Nullable(),
+    #                                              Nullable(), Nullable(), Nullable()]))
+    # @test isequal(m2[:b2],
+    #               NullableArray(Nullable{String}["B", "B", "B", "C", "B",
+    #                                              Nullable(), Nullable(),
+    #                                              Nullable(), Nullable(), Nullable()]))
 
     df1 = DataFrame(a = [1, 2, 3],
                     b = ["America", "Europe", "Africa"])
@@ -201,33 +209,33 @@ module TestData
                     c = ["New World", "Old World", "New World"])
 
     m1 = join(df1, df2, on = :a, kind = :inner)
-    @test isequal(m1[:a], @data([1, 2]))
+    @test isequal(m1[:a], NullableArray([1, 2]))
 
     m2 = join(df1, df2, on = :a, kind = :left)
-    @test isequal(m2[:a], @data([1, 2, 3]))
+    @test isequal(m2[:a], NullableArray([1, 2, 3]))
 
     m3 = join(df1, df2, on = :a, kind = :right)
-    @test isequal(m3[:a], @data([1, 2, 4]))
+    @test isequal(m3[:a], NullableArray([1, 2, 4]))
 
     m4 = join(df1, df2, on = :a, kind = :outer)
-    @test isequal(m4[:a], @data([1, 2, 3, 4]))
+    @test isequal(m4[:a], NullableArray([1, 2, 3, 4]))
 
-    # test with NAs (issue #185)
+    # test with nulls (issue #185)
     df1 = DataFrame()
-    df1[:A] = @data(["a", "b", "a", NA])
-    df1[:B] = @data([1, 2, 1, 3])
+    df1[:A] = NullableArray(Nullable{Compat.ASCIIString}["a", "b", "a", Nullable()])
+    df1[:B] = NullableArray([1, 2, 1, 3])
 
     df2 = DataFrame()
-    df2[:A] = @data(["a", NA, "c"])
-    df2[:C] = @data([1, 2, 4])
+    df2[:A] = NullableArray(Nullable{Compat.ASCIIString}["a", Nullable(), "c"])
+    df2[:C] = NullableArray([1, 2, 4])
 
     m1 = join(df1, df2, on = :A)
     @test size(m1) == (3,3)
-    @test isequal(m1[:A], @data([NA,"a","a"]))
+    @test isequal(m1[:A], NullableArray(Nullable{Compat.ASCIIString}[Nullable(),"a","a"]))
 
     m2 = join(df1, df2, on = :A, kind = :outer)
     @test size(m2) == (5,3)
-    @test isequal(m2[:A], @data([NA,"a","a","b","c"]))
+    @test isequal(m2[:A], NullableArray(Nullable{Compat.ASCIIString}[Nullable(),"a","a","b","c"]))
 
     srand(1)
     df1 = DataFrame(
@@ -241,20 +249,23 @@ module TestData
         b = [:A,:B,:C][[1,1,1,2,3]],
         v2 = randn(5)
     )
-    df2[1,:a] = NA
+    df2[1,:a] = Nullable()
 
     # # TODO: Restore this functionality
     # m1 = join(df1, df2, on = [:a,:b])
-    # @test isequal(m1[:a], DataArray(["x", "x", "y", "y", fill("x", 5)]))
+    # @test isequal(m1[:a], NullableArray(["x", "x", "y", "y", fill("x", 5)]))
     # m2 = join(df1, df2, on = ["a","b"], kind = :outer)
-    # @test isequal(m2[10,:v2], NA)
-    # @test isequal(m2[:a], DataVector["x", "x", "y", "y", "x", "x", "x", "x", "x", "y", NA, "y"])
+    # @test isequal(m2[10,:v2], Nullable())
+    # @test isequal(m2[:a],
+    #               NullableArray(Nullable{String}["x", "x", "y", "y",
+    #                                              "x", "x", "x", "x", "x", "y",
+    #                                              Nullable(), "y"])
 
     srand(1)
     function spltdf(d)
-        d[:x1] = map(x -> x[1], d[:a])
-        d[:x2] = map(x -> x[2], d[:a])
-        d[:x3] = map(x -> x[3], d[:a])
+        d[:x1] = map(x -> get(x)[1], d[:a])
+        d[:x2] = map(x -> get(x)[2], d[:a])
+        d[:x3] = map(x -> get(x)[3], d[:a])
         d
     end
     df1 = DataFrame(
@@ -272,39 +283,6 @@ module TestData
     # m2 = join(df1, df2, on = [:x1, :x2, :x3])
     # @test isequal(sort(m1[:a]), sort(m2[:a]))
 
-    #test_group("New DataVector constructors")
-    dv = DataArray(Int, 5)
-    @test all(isna(dv))
-    dv = DataArray(Float64, 5)
-    @test all(isna(dv))
-    dv = @data(zeros(5))
-    @test all(dv .== 0.0)
-    dv = @data(ones(5))
-    @test all(dv .== 1.0)
-
-    # No more NA corruption
-    dv = @data(ones(10_000))
-    @test !any(isna(dv))
-
-    PooledDataArray(falses(2), falses(2))
-    PooledDataArray(falses(2), trues(2))
-
-    # Test vectorized comparisons work for DataVector's and PooledDataVector's
-    @data([1, 2, NA]) .== 1
-    @pdata([1, 2, NA]) .== 1
-    @data(["1", "2", NA]) .== "1"
-    @pdata(["1", "2", NA]) .== "1"
-
-    # Test unique()
-    #test_group("unique()")
-    # TODO: Restore this
-    # dv = DataArray(1:4)
-    # dv[4] = NA
-    # @test (1 in unique(dv))
-    # @test (2 in unique(dv))
-    # @test (3 in unique(dv))
-    # @test (NA in unique(dv))
-
     # test nonunique() with extra argument
     df1 = DataFrame(a = ["a", "b", "a", "b", "a", "b"], b = 1:6, c = [1:3;1:3])
     df = vcat(df1, df1)
@@ -317,29 +295,16 @@ module TestData
     @test find(nonunique(df, 1)) == collect(3:12)
 
     # Test unique() with extra argument
-    @test unique(df) == df1
-    @test unique(df, :) == df1
-    @test unique(df, Colon()) == df1
-    @test unique(df, 2:3) == df1
-    @test unique(df, 3) == df1[1:3,:]
-    @test unique(df, [1, 3]) == df1
-    @test unique(df, [:a, :c]) == df1
-    @test unique(df, :a) == df1[1:2,:]
+    @test isequal(unique(df), df1)
+    @test isequal(unique(df, :), df1)
+    @test isequal(unique(df, Colon()), df1)
+    @test isequal(unique(df, 2:3), df1)
+    @test isequal(unique(df, 3), df1[1:3,:])
+    @test isequal(unique(df, [1, 3]), df1)
+    @test isequal(unique(df, [:a, :c]), df1)
+    @test isequal(unique(df, :a), df1[1:2,:])
 
     #test unique!() with extra argument
     unique!(df, [1, 3])
-    @test df == df1    
-
-    #test_group("find()")
-    dv = DataArray([true, false, true])
-    @test isequal(find(dv), [1, 3])
-
-    pdv = PooledDataArray([true, false, true])
-    @test isequal(find(pdv), [1, 3])
-
-    dv[1] = NA
-    @test isequal(find(dv), [3])
-
-    pdv[1] = NA
-    @test isequal(find(pdv), [3])
+    @test isequal(df, df1)
 end
diff --git a/test/data/iris.csv b/test/data/iris.csv
new file mode 100644
index 0000000000..603349e022
--- /dev/null
+++ b/test/data/iris.csv
@@ -0,0 +1,151 @@
+"SepalLength","SepalWidth","PetalLength","PetalWidth","Species"
+"5.1","3.5","1.4","0.2","setosa"
+"4.9","3.0","1.4","0.2","setosa"
+"4.7","3.2","1.3","0.2","setosa"
+"4.6","3.1","1.5","0.2","setosa"
+"5.0","3.6","1.4","0.2","setosa"
+"5.4","3.9","1.7","0.4","setosa"
+"4.6","3.4","1.4","0.3","setosa"
+"5.0","3.4","1.5","0.2","setosa"
+"4.4","2.9","1.4","0.2","setosa"
+"4.9","3.1","1.5","0.1","setosa"
+"5.4","3.7","1.5","0.2","setosa"
+"4.8","3.4","1.6","0.2","setosa"
+"4.8","3.0","1.4","0.1","setosa"
+"4.3","3.0","1.1","0.1","setosa"
+"5.8","4.0","1.2","0.2","setosa"
+"5.7","4.4","1.5","0.4","setosa"
+"5.4","3.9","1.3","0.4","setosa"
+"5.1","3.5","1.4","0.3","setosa"
+"5.7","3.8","1.7","0.3","setosa"
+"5.1","3.8","1.5","0.3","setosa"
+"5.4","3.4","1.7","0.2","setosa"
+"5.1","3.7","1.5","0.4","setosa"
+"4.6","3.6","1.0","0.2","setosa"
+"5.1","3.3","1.7","0.5","setosa"
+"4.8","3.4","1.9","0.2","setosa"
+"5.0","3.0","1.6","0.2","setosa"
+"5.0","3.4","1.6","0.4","setosa"
+"5.2","3.5","1.5","0.2","setosa"
+"5.2","3.4","1.4","0.2","setosa"
+"4.7","3.2","1.6","0.2","setosa"
+"4.8","3.1","1.6","0.2","setosa"
+"5.4","3.4","1.5","0.4","setosa"
+"5.2","4.1","1.5","0.1","setosa"
+"5.5","4.2","1.4","0.2","setosa"
+"4.9","3.1","1.5","0.2","setosa"
+"5.0","3.2","1.2","0.2","setosa"
+"5.5","3.5","1.3","0.2","setosa"
+"4.9","3.6","1.4","0.1","setosa"
+"4.4","3.0","1.3","0.2","setosa"
+"5.1","3.4","1.5","0.2","setosa"
+"5.0","3.5","1.3","0.3","setosa"
+"4.5","2.3","1.3","0.3","setosa"
+"4.4","3.2","1.3","0.2","setosa"
+"5.0","3.5","1.6","0.6","setosa"
+"5.1","3.8","1.9","0.4","setosa"
+"4.8","3.0","1.4","0.3","setosa"
+"5.1","3.8","1.6","0.2","setosa"
+"4.6","3.2","1.4","0.2","setosa"
+"5.3","3.7","1.5","0.2","setosa"
+"5.0","3.3","1.4","0.2","setosa"
+"7.0","3.2","4.7","1.4","versicolor"
+"6.4","3.2","4.5","1.5","versicolor"
+"6.9","3.1","4.9","1.5","versicolor"
+"5.5","2.3","4.0","1.3","versicolor"
+"6.5","2.8","4.6","1.5","versicolor"
+"5.7","2.8","4.5","1.3","versicolor"
+"6.3","3.3","4.7","1.6","versicolor"
+"4.9","2.4","3.3","1.0","versicolor"
+"6.6","2.9","4.6","1.3","versicolor"
+"5.2","2.7","3.9","1.4","versicolor"
+"5.0","2.0","3.5","1.0","versicolor"
+"5.9","3.0","4.2","1.5","versicolor"
+"6.0","2.2","4.0","1.0","versicolor"
+"6.1","2.9","4.7","1.4","versicolor"
+"5.6","2.9","3.6","1.3","versicolor"
+"6.7","3.1","4.4","1.4","versicolor"
+"5.6","3.0","4.5","1.5","versicolor"
+"5.8","2.7","4.1","1.0","versicolor"
+"6.2","2.2","4.5","1.5","versicolor"
+"5.6","2.5","3.9","1.1","versicolor"
+"5.9","3.2","4.8","1.8","versicolor"
+"6.1","2.8","4.0","1.3","versicolor"
+"6.3","2.5","4.9","1.5","versicolor"
+"6.1","2.8","4.7","1.2","versicolor"
+"6.4","2.9","4.3","1.3","versicolor"
+"6.6","3.0","4.4","1.4","versicolor"
+"6.8","2.8","4.8","1.4","versicolor"
+"6.7","3.0","5.0","1.7","versicolor"
+"6.0","2.9","4.5","1.5","versicolor"
+"5.7","2.6","3.5","1.0","versicolor"
+"5.5","2.4","3.8","1.1","versicolor"
+"5.5","2.4","3.7","1.0","versicolor"
+"5.8","2.7","3.9","1.2","versicolor"
+"6.0","2.7","5.1","1.6","versicolor"
+"5.4","3.0","4.5","1.5","versicolor"
+"6.0","3.4","4.5","1.6","versicolor"
+"6.7","3.1","4.7","1.5","versicolor"
+"6.3","2.3","4.4","1.3","versicolor"
+"5.6","3.0","4.1","1.3","versicolor"
+"5.5","2.5","4.0","1.3","versicolor"
+"5.5","2.6","4.4","1.2","versicolor"
+"6.1","3.0","4.6","1.4","versicolor"
+"5.8","2.6","4.0","1.2","versicolor"
+"5.0","2.3","3.3","1.0","versicolor"
+"5.6","2.7","4.2","1.3","versicolor"
+"5.7","3.0","4.2","1.2","versicolor"
+"5.7","2.9","4.2","1.3","versicolor"
+"6.2","2.9","4.3","1.3","versicolor"
+"5.1","2.5","3.0","1.1","versicolor"
+"5.7","2.8","4.1","1.3","versicolor"
+"6.3","3.3","6.0","2.5","virginica"
+"5.8","2.7","5.1","1.9","virginica"
+"7.1","3.0","5.9","2.1","virginica"
+"6.3","2.9","5.6","1.8","virginica"
+"6.5","3.0","5.8","2.2","virginica"
+"7.6","3.0","6.6","2.1","virginica"
+"4.9","2.5","4.5","1.7","virginica"
+"7.3","2.9","6.3","1.8","virginica"
+"6.7","2.5","5.8","1.8","virginica"
+"7.2","3.6","6.1","2.5","virginica"
+"6.5","3.2","5.1","2.0","virginica"
+"6.4","2.7","5.3","1.9","virginica"
+"6.8","3.0","5.5","2.1","virginica"
+"5.7","2.5","5.0","2.0","virginica"
+"5.8","2.8","5.1","2.4","virginica"
+"6.4","3.2","5.3","2.3","virginica"
+"6.5","3.0","5.5","1.8","virginica"
+"7.7","3.8","6.7","2.2","virginica"
+"7.7","2.6","6.9","2.3","virginica"
+"6.0","2.2","5.0","1.5","virginica"
+"6.9","3.2","5.7","2.3","virginica"
+"5.6","2.8","4.9","2.0","virginica"
+"7.7","2.8","6.7","2.0","virginica"
+"6.3","2.7","4.9","1.8","virginica"
+"6.7","3.3","5.7","2.1","virginica"
+"7.2","3.2","6.0","1.8","virginica"
+"6.2","2.8","4.8","1.8","virginica"
+"6.1","3.0","4.9","1.8","virginica"
+"6.4","2.8","5.6","2.1","virginica"
+"7.2","3.0","5.8","1.6","virginica"
+"7.4","2.8","6.1","1.9","virginica"
+"7.9","3.8","6.4","2.0","virginica"
+"6.4","2.8","5.6","2.2","virginica"
+"6.3","2.8","5.1","1.5","virginica"
+"6.1","2.6","5.6","1.4","virginica"
+"7.7","3.0","6.1","2.3","virginica"
+"6.3","3.4","5.6","2.4","virginica"
+"6.4","3.1","5.5","1.8","virginica"
+"6.0","3.0","4.8","1.8","virginica"
+"6.9","3.1","5.4","2.1","virginica"
+"6.7","3.1","5.6","2.4","virginica"
+"6.9","3.1","5.1","2.3","virginica"
+"5.8","2.7","5.1","1.9","virginica"
+"6.8","3.2","5.9","2.3","virginica"
+"6.7","3.3","5.7","2.5","virginica"
+"6.7","3.0","5.2","2.3","virginica"
+"6.3","2.5","5.0","1.9","virginica"
+"6.5","3.0","5.2","2.0","virginica"
+"6.2","3.4","5.4","2.3","virginica"
+"5.9","3.0","5.1","1.8","virginica"
diff --git a/test/dataframe.jl b/test/dataframe.jl
index 6b36e801fb..2814d45765 100644
--- a/test/dataframe.jl
+++ b/test/dataframe.jl
@@ -7,23 +7,28 @@ module TestDataFrame
     # Equality
     #
 
-    @test isequal(DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])), DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])))
-    @test !isequal(DataFrame(a=@data([1, 2]), b=@data([4, 5])), DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])))
-    @test !isequal(DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])), DataFrame(a=@data([1, 2, 3])))
-    @test !isequal(DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])), DataFrame(a=@data([1, 2, 3]), c=@data([4, 5, 6])))
-    @test !isequal(DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])), DataFrame(b=@data([4, 5, 6]), a=@data([1, 2, 3])))
-    @test !isequal(DataFrame(a=@data([1, 2, 2]), b=@data([4, 5, 6])), DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])))
-    @test isequal(DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6])), DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6])))
-
-    @test DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])) == DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6]))
-    @test DataFrame(a=@data([1, 2]), b=@data([4, 5])) != DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6]))
-    @test DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])) != DataFrame(a=@data([1, 2, 3]))
-    @test DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])) != DataFrame(a=@data([1, 2, 3]), c=@data([4, 5, 6]))
-    @test DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])) != DataFrame(b=@data([4, 5, 6]), a=@data([1, 2, 3]))
-    @test DataFrame(a=@data([1, 2, 2]), b=@data([4, 5, 6])) != DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6]))
-    @test DataFrame(a=@data([1, 3, NA]), b=@data([4, 5, 6])) != DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6]))
-    @test isna(DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6])) == DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6])))
-    @test isna(DataFrame(a=@data([1, 2, NA]), b=@data([4, 5, 6])) == DataFrame(a=@data([1, 2, 3]), b=@data([4, 5, 6])))
+    @test isequal(DataFrame(a=[1, 2, 3], b=[4, 5, 6]), DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    @test !isequal(DataFrame(a=[1, 2], b=[4, 5]), DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    @test !isequal(DataFrame(a=[1, 2, 3], b=[4, 5, 6]), DataFrame(a=[1, 2, 3]))
+    @test !isequal(DataFrame(a=[1, 2, 3], b=[4, 5, 6]), DataFrame(a=[1, 2, 3], c=[4, 5, 6]))
+    @test !isequal(DataFrame(a=[1, 2, 3], b=[4, 5, 6]), DataFrame(b=[4, 5, 6], a=[1, 2, 3]))
+    @test !isequal(DataFrame(a=[1, 2, 2], b=[4, 5, 6]), DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    @test isequal(DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]),
+                  DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]))
+
+    # FIXME: equality operators won't work until JuliaStats/NullableArrays#84 is merged
+    #@test get(DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    #@test get(DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    #@test get(DataFrame(a=[1, 2, 3], b=[4, 5, 6]) != DataFrame(a=[1, 2, 3]))
+    #@test get(DataFrame(a=[1, 2, 3], b=[4, 5, 6]) != DataFrame(a=[1, 2, 3], c=[4, 5, 6]))
+    #@test get(DataFrame(a=[1, 2, 3], b=[4, 5, 6]) != DataFrame(b=[4, 5, 6], a=[1, 2, 3]))
+    #@test get(DataFrame(a=[1, 2, 2], b=[4, 5, 6]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6]))
+    #@test get(DataFrame(a=Nullable{Int}[1, 3, Nullable()], b=[4, 5, 6]) !=
+    #          DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]))
+    #@test isnull(DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]) ==
+    #             DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]))
+    #@test isnull(DataFrame(a=Nullable{Int}[1, 2, Nullable()], b=[4, 5, 6]) ==
+    #             DataFrame(a=Nullable{Int}[1, 2, 3], b=[4, 5, 6]))
 
     #
     # Copying
@@ -34,17 +39,17 @@ module TestDataFrame
     dfdc = deepcopy(df)
 
     df[1, :a] = 4
-    df[1, :b][:e] = 5
+    get(df[1, :b])[:e] = 5
     names!(df, [:f, :g])
 
     @test names(dfc) == [:a, :b]
     @test names(dfdc) == [:a, :b]
 
-    @test dfc[1, :a] == 4
-    @test dfdc[1, :a] == 2
+    @test get(dfc[1, :a]) === 4
+    @test get(dfdc[1, :a]) === 2
 
-    @test names(dfc[1, :b]) == [:c, :e]
-    @test names(dfdc[1, :b]) == [:c]
+    @test names(get(dfc[1, :b])) == [:c, :e]
+    @test names(get(dfdc[1, :b])) == [:c]
 
     #
 
@@ -64,18 +69,18 @@ module TestDataFrame
 
     # Insert single value
     x[:d] = 3
-    @test x[:d] == [3, 3, 3]
+    @test isequal(x[:d], NullableArray([3, 3, 3]))
 
     x0[:d] = 3
     @test x0[:d] == Int[]
 
-    # similar / nas
-    df = DataFrame(a = 1, b = "b", c = @pdata([3.3]))
-    nadf = DataFrame(a = @data(Int[NA, NA]),
-                     b = DataArray(Array(String, 2), trues(2)),
-                     c = @pdata(Float64[NA, NA]))
-    @test isequal(nadf, similar(df, 2))
-    @test isequal(nadf, DataFrames.nas(df, 2))
+    # similar / nulls
+    df = DataFrame(a = 1, b = "b", c = CategoricalArray([3.3]))
+    nulldf = DataFrame(a = NullableArray(Int, 2),
+                       b = NullableArray(String, 2),
+                       c = NullableCategoricalArray(Float64, 2))
+    @test isequal(nulldf, similar(df, 2))
+    @test isequal(nulldf, DataFrames.similar_nullable(df, 2))
 
     # Associative methods
 
@@ -93,58 +98,58 @@ module TestDataFrame
     df = DataFrame(a=[1, 2], b=[3., 4.])
     @test_throws BoundsError insert!(df, 5, ["a", "b"], :newcol)
     @test_throws ErrorException insert!(df, 1, ["a"], :newcol)
-    @test insert!(df, 1, ["a", "b"], :newcol) == df
-    @test isequal(df, DataFrame(newcol=["a", "b"], a=[1, 2], b=[3., 4.]))
-    df = DataFrame(a=[1, 2], b=[3., 4.])
-    @test insert!(df, 3, ["a", "b"], :newcol) == df
-    @test isequal(df, DataFrame(a=[1, 2], b=[3., 4.], newcol=["a", "b"]))
+    @test isequal(insert!(df, 1, ["a", "b"], :newcol), df)
+    @test names(df) == [:newcol, :a, :b]
+    @test isequal(df[:a], NullableArray([1, 2]))
+    @test isequal(df[:b], NullableArray([3., 4.]))
+    @test isequal(df[:newcol], ["a", "b"])
 
     df = DataFrame(a=[1, 2], b=[3., 4.])
     df2 = DataFrame(b=["a", "b"], c=[:c, :d])
-    @test merge!(df, df2) == df
+    @test isequal(merge!(df, df2), df)
     @test isequal(df, DataFrame(a=[1, 2], b=["a", "b"], c=[:c, :d]))
 
     #test_group("Empty DataFrame constructors")
     df = DataFrame(Int, 10, 3)
     @test size(df, 1) == 10
     @test size(df, 2) == 3
-    @test typeof(df[:, 1]) == DataVector{Int}
-    @test typeof(df[:, 2]) == DataVector{Int}
-    @test typeof(df[:, 3]) == DataVector{Int}
-    @test allna(df[:, 1])
-    @test allna(df[:, 2])
-    @test allna(df[:, 3])
+    @test typeof(df[:, 1]) == NullableVector{Int}
+    @test typeof(df[:, 2]) == NullableVector{Int}
+    @test typeof(df[:, 3]) == NullableVector{Int}
+    @test allnull(df[:, 1])
+    @test allnull(df[:, 2])
+    @test allnull(df[:, 3])
 
     df = DataFrame(Any[Int, Float64, String], 100)
     @test size(df, 1) == 100
     @test size(df, 2) == 3
-    @test typeof(df[:, 1]) == DataVector{Int}
-    @test typeof(df[:, 2]) == DataVector{Float64}
-    @test typeof(df[:, 3]) == DataVector{String}
-    @test allna(df[:, 1])
-    @test allna(df[:, 2])
-    @test allna(df[:, 3])
+    @test typeof(df[:, 1]) == NullableVector{Int}
+    @test typeof(df[:, 2]) == NullableVector{Float64}
+    @test typeof(df[:, 3]) == NullableVector{String}
+    @test allnull(df[:, 1])
+    @test allnull(df[:, 2])
+    @test allnull(df[:, 3])
 
     df = DataFrame(Any[Int, Float64, String], [:A, :B, :C], 100)
     @test size(df, 1) == 100
     @test size(df, 2) == 3
-    @test typeof(df[:, 1]) == DataVector{Int}
-    @test typeof(df[:, 2]) == DataVector{Float64}
-    @test typeof(df[:, 3]) == DataVector{String}
-    @test allna(df[:, 1])
-    @test allna(df[:, 2])
-    @test allna(df[:, 3])
+    @test typeof(df[:, 1]) == NullableVector{Int}
+    @test typeof(df[:, 2]) == NullableVector{Float64}
+    @test typeof(df[:, 3]) == NullableVector{String}
+    @test allnull(df[:, 1])
+    @test allnull(df[:, 2])
+    @test allnull(df[:, 3])
 
 
     df = DataFrame(DataType[Int, Float64, Compat.UTF8String],[:A, :B, :C], [false,false,true],100)
     @test size(df, 1) == 100
     @test size(df, 2) == 3
-    @test typeof(df[:, 1]) == DataVector{Int}
-    @test typeof(df[:, 2]) == DataVector{Float64}
-    @test typeof(df[:, 3]) == PooledDataVector{Compat.UTF8String,UInt32}
-    @test allna(df[:, 1])
-    @test allna(df[:, 2])
-    @test allna(df[:, 3])
+    @test typeof(df[:, 1]) == NullableVector{Int}
+    @test typeof(df[:, 2]) == NullableVector{Float64}
+    @test typeof(df[:, 3]) == NullableCategoricalVector{Compat.UTF8String,UInt32}
+    @test allnull(df[:, 1])
+    @test allnull(df[:, 2])
+    @test allnull(df[:, 3])
 
 
     df = convert(DataFrame, zeros(10, 5))
@@ -168,8 +173,8 @@ module TestDataFrame
                     @compat(Dict{Any,Any}(:a=>5))])
     @test size(df, 1) == 3
     @test size(df, 2) == 2
-    @test typeof(df[:,:a]) == DataVector{Int}
-    @test typeof(df[:,:b]) == DataVector{Char}
+    @test typeof(df[:,:a]) == NullableVector{Int}
+    @test typeof(df[:,:b]) == NullableVector{Char}
 
     df = DataFrame([@compat(Dict{Any,Any}(:a=>1, :b=>'c')),
                     @compat(Dict{Any,Any}(:a=>3, :b=>'d')),
@@ -177,9 +182,10 @@ module TestDataFrame
                    [:a, :b])
     @test size(df, 1) == 3
     @test size(df, 2) == 2
-    @test typeof(df[:,:a]) == DataVector{Int}
-    @test typeof(df[:,:b]) == DataVector{Char}
+    @test typeof(df[:,:a]) == NullableVector{Int}
+    @test typeof(df[:,:b]) == NullableVector{Char}
 
+    @test DataFrame(NullableArray[[1,2,3],[2.5,4.5,6.5]], [:A, :B]) == DataFrame(A = [1,2,3], B = [2.5,4.5,6.5])
 
     # This assignment was missing before
     df = DataFrame(Column = [:A])
@@ -201,11 +207,11 @@ module TestDataFrame
 
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     push!(dfb, Any[3,"pear"])
-    @test df==dfb
+    @test isequal(df, dfb)
 
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     push!(dfb, (3,"pear"))
-    @test df==dfb
+    @test isequal(df, dfb)
 
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     @test_throws ArgumentError push!(dfb, (33.33,"pear"))
@@ -215,22 +221,22 @@ module TestDataFrame
 
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     push!(dfb, @compat(Dict(:first=>3, :second=>"pear")))
-    @test df==dfb
+    @test isequal(df, dfb)
 
     df=DataFrame( first=[1,2,3], second=["apple","orange","banana"] )
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     push!(dfb, @compat(Dict("first"=>3, "second"=>"banana")))
-    @test df==dfb
+    @test isequal(df, dfb)
 
     df0= DataFrame( first=[1,2], second=["apple","orange"] )
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     @test_throws ArgumentError push!(dfb, @compat(Dict(:first=>true, :second=>false)))
-    @test df0==dfb
+    @test isequal(df0, dfb)
 
     df0= DataFrame( first=[1,2], second=["apple","orange"] )
     dfb= DataFrame( first=[1,2], second=["apple","orange"] )
     @test_throws ArgumentError push!(dfb, @compat(Dict("first"=>"chicken", "second"=>"stuff")))
-    @test df0==dfb
+    @test isequal(df0, dfb)
 
     # delete!
     df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@@ -267,54 +273,61 @@ module TestDataFrame
     @test deleterows!(df, [2, 3]) === df
     @test isequal(df, DataFrame(a=[1], b=[3.]))
 
-    df = DataFrame(a=@data([1, 2]), b=@data([3., 4.]))
+    df = DataFrame(a=NullableArray([1, 2]), b=NullableArray([3., 4.]))
     @test deleterows!(df, 1) === df
-    @test isequal(df, DataFrame(a=@data([2]), b=@data([4.])))
+    @test isequal(df, DataFrame(a=NullableArray([2]), b=NullableArray([4.])))
 
-    df = DataFrame(a=@data([1, 2]), b=@data([3., 4.]))
+    df = DataFrame(a=NullableArray([1, 2]), b=NullableArray([3., 4.]))
     @test deleterows!(df, 2) === df
-    @test isequal(df, DataFrame(a=@data([1]), b=@data([3.])))
+    @test isequal(df, DataFrame(a=NullableArray([1]), b=NullableArray([3.])))
 
-    df = DataFrame(a=@data([1, 2, 3]), b=@data([3., 4., 5.]))
+    df = DataFrame(a=NullableArray([1, 2, 3]), b=NullableArray([3., 4., 5.]))
     @test deleterows!(df, 2:3) === df
-    @test isequal(df, DataFrame(a=@data([1]), b=@data([3.])))
+    @test isequal(df, DataFrame(a=NullableArray([1]), b=NullableArray([3.])))
 
-    df = DataFrame(a=@data([1, 2, 3]), b=@data([3., 4., 5.]))
+    df = DataFrame(a=NullableArray([1, 2, 3]), b=NullableArray([3., 4., 5.]))
     @test deleterows!(df, [2, 3]) === df
-    @test isequal(df, DataFrame(a=@data([1]), b=@data([3.])))
+    @test isequal(df, DataFrame(a=NullableArray([1]), b=NullableArray([3.])))
 
     # describe
     #suppress output and test that describe() does not throw
     devnull = is_unix() ? "/dev/null" : "nul"
     open(devnull, "w") do f
-        @test nothing == describe(f, DataFrame(a=[1, 2], b=Any["3", NA]))
-        @test nothing == describe(f, DataFrame(a=@data([1, 2]), b=@data(["3", NA])))
-        @test nothing == describe(f, DataFrame(a=@pdata([1, 2]), b=@pdata(["3", NA])))
+        @test nothing == describe(f, DataFrame(a=[1, 2], b=Any["3", Nullable()]))
+        @test nothing ==
+              describe(f, DataFrame(a=NullableArray([1, 2]),
+                                    b=NullableArray(Nullable{String}["3", Nullable()])))
+        @test nothing ==
+              describe(f, DataFrame(a=CategoricalArray([1, 2]),
+                                    b=NullableCategoricalArray(Nullable{String}["3", Nullable()])))
         @test nothing == describe(f, [1, 2, 3])
-        @test nothing == describe(f, @data([1, 2, 3]))
-        @test nothing == describe(f, @pdata([1, 2, 3]))
-        @test nothing == describe(f, Any["1", "2", NA])
-        @test nothing == describe(f, @data(["1", "2", NA]))
-        @test nothing == describe(f, @pdata(["1", "2", NA]))
+        @test nothing == describe(f, NullableArray([1, 2, 3]))
+        @test nothing == describe(f, CategoricalArray([1, 2, 3]))
+        @test nothing == describe(f, Any["1", "2", Nullable()])
+        @test nothing == describe(f, NullableArray(Nullable{String}["1", "2", Nullable()]))
+        @test nothing == describe(f, NullableCategoricalArray(Nullable{String}["1", "2", Nullable()]))
     end
 
     #Check the output of unstack
-    df = DataFrame(Fish = ["Bob", "Bob", "Batman", "Batman"],
-        Key = ["Mass", "Color", "Mass", "Color"],
-        Value = ["12 g", "Red", "18 g", "Grey"])
+    df = DataFrame(Fish = CategoricalArray(["Bob", "Bob", "Batman", "Batman"]),
+                   Key = ["Mass", "Color", "Mass", "Color"],
+                   Value = ["12 g", "Red", "18 g", "Grey"])
+    # Check that reordering levels does not confuse unstack
+    levels!(df[1], ["XXX", "Bob", "Batman"])
     #Unstack specifying a row column
     df2 = unstack(df,:Fish, :Key, :Value)
     #Unstack without specifying a row column
     df3 = unstack(df,:Key, :Value)
     #The expected output
-    df4 = DataFrame(Fish = ["Batman", "Bob"], Color = ["Grey", "Red"], Mass = ["18 g", "12 g"])
-    @test df2 == df4
-    @test df3 == df4
-    #Make sure unstack works with NAs at the start of the value column
-    df[1,:Value] = NA
+    df4 = DataFrame(Fish = ["XXX", "Bob", "Batman"],
+                    Color = Nullable{String}[Nullable(), "Red", "Grey"],
+                    Mass = Nullable{String}[Nullable(), "12 g", "18 g"])
+    @test isequal(df2, df4)
+    @test isequal(df3, df4[2:3, :])
+    #Make sure unstack works with NULLs at the start of the value column
+    df[1,:Value] = Nullable()
     df2 = unstack(df,:Fish, :Key, :Value)
     #This changes the expected result
-    df4[2,:Mass] = NA
+    df4[2,:Mass] = Nullable()
     @test isequal(df2, df4)
-
 end
diff --git a/test/dataframerow.jl b/test/dataframerow.jl
index 4fcc2c66e0..0b69555db6 100644
--- a/test/dataframerow.jl
+++ b/test/dataframerow.jl
@@ -2,12 +2,14 @@ module TestDataFrameRow
     using Base.Test
     using DataFrames, Compat
 
-    df = DataFrame(a=@data([1,   2,   3,   1,   2,   2 ]),
-                   b=@data([2.0, NA,  1.2, 2.0, NA,  NA]),
-                   c=@data(["A", "B", "C", "A", "B", NA]),
-                   d=PooledDataArray(
-                     @data([:A,  NA,  :C,  :A,  NA,  :C])))
-    df2 = DataFrame(a = @data([1, 2, 3]))
+    df = DataFrame(a=NullableArray([1,   2,   3,   1,   2,   2 ]),
+                   b=NullableArray(Nullable{Float64}[2.0, Nullable(),
+                                                     1.2, 2.0,
+                                                     Nullable(), Nullable()]),
+                   c=NullableArray(Nullable{String}["A", "B", "C", "A", "B", Nullable()]),
+                   d=NullableCategoricalArray(Nullable{Symbol}[:A,  Nullable(),  :C,  :A,
+                                                           Nullable(),  :C]))
+    df2 = DataFrame(a = NullableArray([1, 2, 3]))
 
     #
     # Equality
diff --git a/test/duplicates.jl b/test/duplicates.jl
index 848dded61c..5656cbbb51 100644
--- a/test/duplicates.jl
+++ b/test/duplicates.jl
@@ -9,10 +9,12 @@ module TestDuplicates
     unique!(df)
     @test isequal(df, udf)
 
-    pdf = DataFrame( a = PooledDataArray( @data ["a", "a", NA, NA, "b", NA, "a", NA] ),
-                     b = PooledDataArray( @data ["a", "b", NA, NA, "b", "a", "a", "a"] ) )
-    updf = DataFrame( a = PooledDataArray( @data ["a", "a", NA, "b", NA] ),
-                      b = PooledDataArray( @data ["a", "b", NA, "b", "a"] ) )
+    pdf = DataFrame(a = NullableCategoricalArray(Nullable{String}["a", "a", Nullable(),
+                                             Nullable(), "b", Nullable(), "a", Nullable()]),
+                    b = NullableCategoricalArray(Nullable{String}["a", "b", Nullable(),
+                                                              Nullable(), "b", "a", "a", "a"]))
+    updf = DataFrame(a = NullableCategoricalArray(Nullable{String}["a", "a", Nullable(), "b", Nullable()]),
+                     b = NullableCategoricalArray(Nullable{String}["a", "b", Nullable(), "b", "a"]))
     @test isequal(nonunique(pdf), [false, false, false, true, false, false, true, true])
     @test isequal(nonunique(updf), falses(5) )
     @test isequal(updf, unique(pdf))
diff --git a/test/formula.jl b/test/formula.jl
index 11c093158b..db7d5e00b9 100644
--- a/test/formula.jl
+++ b/test/formula.jl
@@ -53,9 +53,10 @@ module TestFormula
     @test t.intercept == false
     @test t.terms == [:x1, :x2]
 
-    t = Terms(y ~ -1 + x1 + x2)
-    @test t.intercept == false
-    @test t.terms == [:x1, :x2]
+    @test t == Terms(y ~ -1 + x1 + x2) == Terms(y ~ x1 - 1 + x2) == Terms(y ~ x1 + x2 -1)
+
+    ## can't subtract terms other than 1
+    @test_throws ErrorException Terms(y ~ x1 - x2)
 
     t = Terms(y ~ x1 & x2)
     @test t.terms == [:(x1 & x2)]
@@ -133,11 +134,10 @@ module TestFormula
 
     @test isa(mm.m, Matrix{Float64})
     @test isa(smm.m, sparsetype)
-    @test isa(ModelMatrix{DataMatrix{Float64}}(mf).m, DataMatrix{Float64})
 
-    #test_group("expanding a PooledVec into a design matrix of indicators for each dummy variable")
+    #test_group("expanding a nominal array into a design matrix of indicators for each dummy variable")
 
-    d[:x1p] = PooledDataArray(d[:x1])
+    d[:x1p] = NullableCategoricalArray(d[:x1])
     mf = ModelFrame(y ~ x1p, d)
     mm = ModelMatrix(mf)
 
@@ -182,24 +182,24 @@ module TestFormula
     ## @test r[:,1] == DataVector(df["x1"])
     ## @test r[:,2] == DataVector(df["x2"])
 
-    ## df["x1"] = PooledDataArray(x1)
+    ## df["x1"] = CategoricalArray(x1)
     ## r = expand(:x1, df)
     ## @test isa(r, DataFrame)
     ## @test ncol(r) == 3
-    ## @test r == expand(PooledDataArray(x1), "x1", DataFrame())
+    ## @test r == expand(CategoricalArray(x1), "x1", DataFrame())
 
     ## r = expand(:(x1 + x2), df)
     ## @test isa(r, DataFrame)
     ## @test ncol(r) == 4
-    ## @test r[:,1:3] == expand(PooledDataArray(x1), "x1", DataFrame())
+    ## @test r[:,1:3] == expand(CategoricalArray(x1), "x1", DataFrame())
     ## @test r[:,4] == DataVector(df["x2"])
 
-    ## df["x2"] = PooledDataArray(x2)
+    ## df["x2"] = CategoricalArray(x2)
     ## r = expand(:(x1 + x2), df)
     ## @test isa(r, DataFrame)
     ## @test ncol(r) == 6
-    ## @test r[:,1:3] == expand(PooledDataArray(x1), "x1", DataFrame())
-    ## @test r[:,4:6] == expand(PooledDataArray(x2), "x2", DataFrame())
+    ## @test r[:,1:3] == expand(CategoricalArray(x1), "x1", DataFrame())
+    ## @test r[:,4:6] == expand(CategoricalArray(x2), "x2", DataFrame())
 
     #test_group("Creating a model matrix using full formulas: y ~ x1 + x2, etc")
 
@@ -216,7 +216,7 @@ module TestFormula
     @test mm.m == [ones(4) x1 x2 x1.*x2]
     @test mm.m == ModelMatrix{sparsetype}(mf).m
 
-    df[:x1] = PooledDataArray(x1)
+    df[:x1] = CategoricalArray(x1)
     x1e = [[0, 1, 0, 0] [0, 0, 1, 0] [0, 0, 0, 1]]
     f = y ~ x1 * x2
     mf = ModelFrame(f, df)
@@ -235,7 +235,7 @@ module TestFormula
     ## @test mm.m == [ones(4) x1 log(x2)]
 
     ## df = deepcopy(d)
-    ## df["x1"] = PooledDataArray([5:8])
+    ## df["x1"] = CategoricalArray([5:8])
     ## f = Formula(:(y ~ x1 * (log(x2) + x3)))
     ## mf = ModelFrame(f, df)
     ## mm = ModelMatrix(mf)
@@ -277,7 +277,7 @@ module TestFormula
     ## @test model_response(mf) == y''     # fails: Int64 vs. Float64
 
     df = deepcopy(d)
-    df[:x1] = PooledDataArray(df[:x1])
+    df[:x1] = NullableCategoricalArray(df[:x1])
 
     f = y ~ x2 + x3 + x3*x2
     mm = ModelMatrix(ModelFrame(f, df))
@@ -334,9 +334,9 @@ module TestFormula
     ## FAILS: behavior is wrong when no lower-order terms (1+x1+x2+x1&x2...)
     ##
     ## df = DataFrame(y=1:27,
-    ##                x1 = PooledDataArray(vec([x for x in 1:3, y in 4:6, z in 7:9])),
-    ##                x2 = PooledDataArray(vec([y for x in 1:3, y in 4:6, z in 7:9])),
-    ##                x3 = PooledDataArray(vec([z for x in 1:3, y in 4:6, z in 7:9])))
+    ##                x1 = CategoricalArray(vec([x for x in 1:3, y in 4:6, z in 7:9])),
+    ##                x2 = CategoricalArray(vec([y for x in 1:3, y in 4:6, z in 7:9])),
+    ##                x3 = CategoricalArray(vec([z for x in 1:3, y in 4:6, z in 7:9])))
     ## f = y ~ x1 & x2 & x3
     ## mf = ModelFrame(f, df)
     ## @test coefnames(mf)[2:end] ==
@@ -380,10 +380,10 @@ module TestFormula
     @test size(mm_sub) == (3,3)
 
     ## Missing data
-    d[:x1m] = @data [5, 6, NA, 7]
+    d[:x1m] = NullableArray(Nullable{Int}[5, 6, Nullable(), 7])
     mf = ModelFrame(y ~ x1m, d)
     mm = ModelMatrix(mf)
-    @test mm.m[:, 2] == d[complete_cases(d), :x1m]
+    @test isequal(NullableArray(mm.m[:, 2]), d[complete_cases(d), :x1m])
     @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     ## Same variable on left and right side
@@ -396,7 +396,7 @@ module TestFormula
 d = DataFrame(x = Compat.repeat([:a, :b], outer = 4),
               y = Compat.repeat([:c, :d], inner = 2, outer = 2),
               z = Compat.repeat([:e, :f], inner = 4))
-[pool!(d, name) for name in names(d)]
+[categorical!(d, name) for name in names(d)]
 cs = Dict([Pair(name, EffectsCoding()) for name in names(d)])
 d[:n] = 1.:8
 
@@ -545,5 +545,6 @@ df = DataFrame(x = [1.0,2.0,3.0], y = [4.0,5.0,6.0])
 mf = ModelFrame(y ~ 0 + x, df)
 X = ModelMatrix(mf).m
 X[1] = 0.0
-@test mf.df[1, :x] == 1.0
+@test mf.df[1, :x] === Nullable(1.0)
+
 end
diff --git a/test/grouping.jl b/test/grouping.jl
index c7dbb78dd1..c138584cec 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -5,8 +5,8 @@ module TestGrouping
     df = DataFrame(a = repeat([1, 2, 3, 4], outer=[2]),
                    b = repeat([2, 1], outer=[4]),
                    c = randn(8))
-    #df[6, :a] = NA
-    #df[7, :b] = NA
+    #df[6, :a] = Nullable()
+    #df[7, :b] = Nullable()
 
     cols = [:a, :b]
 
@@ -15,7 +15,7 @@ module TestGrouping
     sdf = sort(df, cols=cols)
     bdf = by(df, cols, f)
 
-    @test bdf[cols] == unique(sdf[cols])
+    @test isequal(bdf[cols], unique(sdf[cols]))
 
     byf = by(df, :a, df -> DataFrame(bsum = sum(df[:b])))
 
@@ -25,19 +25,68 @@ module TestGrouping
     gd = groupby(df, cols)
     ga = map(f, gd)
 
-    @test bdf == combine(ga)
+    @test isequal(bdf, combine(ga))
 
-    g(df) = DataFrame(cmax1 = df[:cmax] + 1)
+    g(df) = DataFrame(cmax1 = Vector(df[:cmax]) + 1)
     h(df) = g(f(df))
 
-    @test combine(map(h, gd)) == combine(map(g, ga))
+    @test isequal(combine(map(h, gd)), combine(map(g, ga)))
+
+    # testing pool overflow
+    df2 = DataFrame(v1 = categorical(collect(1:1000)), v2 = categorical(fill(1, 1000)))
+    @test groupby(df2, [:v1, :v2]).starts == collect(1:1000)
+    @test groupby(df2, [:v2, :v1]).starts == collect(1:1000)
+
+    # grouping empty frame
+    @test groupby(DataFrame(A=Int[]), :A).starts == Int[]
+    # grouping single row
+    @test groupby(DataFrame(A=Int[1]), :A).starts == Int[1]
 
     # issue #960
-    x = pool(collect(1:20))
+    x = CategoricalArray(collect(1:20))
     df = DataFrame(v1=x, v2=x)
     groupby(df, [:v1, :v2])
 
     df2 = by(e->1, DataFrame(x=Int64[]), :x)
     @test size(df2) == (0,1)
-    @test sum(df2[:x]) == 0
+    @test isequal(sum(df2[:x]), Nullable(0))
+
+    # Check that reordering levels does not confuse groupby
+    df = DataFrame(Key1 = CategoricalArray(["A", "A", "B", "B"]),
+                   Key2 = CategoricalArray(["A", "B", "A", "B"]),
+                   Value = 1:4)
+    gd = groupby(df, :Key1)
+    @test isequal(gd[1], DataFrame(Key1=["A", "A"], Key2=["A", "B"], Value=1:2))
+    @test isequal(gd[2], DataFrame(Key1=["B", "B"], Key2=["A", "B"], Value=3:4))
+    gd = groupby(df, [:Key1, :Key2])
+    @test isequal(gd[1], DataFrame(Key1="A", Key2="A", Value=1))
+    @test isequal(gd[2], DataFrame(Key1="A", Key2="B", Value=2))
+    @test isequal(gd[3], DataFrame(Key1="B", Key2="A", Value=3))
+    @test isequal(gd[4], DataFrame(Key1="B", Key2="B", Value=4))
+    # Reorder levels, add unused level
+    levels!(df[:Key1], ["Z", "B", "A"])
+    levels!(df[:Key2], ["Z", "B", "A"])
+    gd = groupby(df, :Key1)
+    @test isequal(gd[1], DataFrame(Key1=["B", "B"], Key2=["A", "B"], Value=3:4))
+    @test isequal(gd[2], DataFrame(Key1=["A", "A"], Key2=["A", "B"], Value=1:2))
+    gd = groupby(df, [:Key1, :Key2])
+    @test isequal(gd[1], DataFrame(Key1="B", Key2="B", Value=4))
+    @test isequal(gd[2], DataFrame(Key1="B", Key2="A", Value=3))
+    @test isequal(gd[3], DataFrame(Key1="A", Key2="B", Value=2))
+    @test isequal(gd[4], DataFrame(Key1="A", Key2="A", Value=1))
+
+    a = DataFrame(x=categorical(1:200))
+    b = DataFrame(x=categorical(100:300))
+    a[:x] = compact(a[:x])
+    b[:x] = compact(b[:x])
+    r = vcat(a, b)
+    @test isequal(r, DataFrame(x=[categorical(1:200); categorical(100:300)]))
+
+    a = DataFrame(x=categorical(1:200))
+    b = DataFrame(y=categorical(100:300))
+    a[:x] = compact(a[:x])
+    b[:y] = compact(b[:y])
+    r = vcat(a, b)
+    @test isequal(r, DataFrame(x=NullableCategoricalArray(1:401, [fill(false, 200); fill(true, 201)]),
+                               y=NullableCategoricalArray(-100:300, [fill(true, 200); fill(false, 201)])))
 end
diff --git a/test/index.jl b/test/index.jl
index 9160da4249..41fc3f1495 100644
--- a/test/index.jl
+++ b/test/index.jl
@@ -16,16 +16,11 @@ inds = Any[1,
            1:1,
            1.0:1.0,
            [:A],
-           @data([true]),
-           @data([1]),
-           @data([1.0]),
-           @data([:A]),
-           DataArray([:A]),
-           PooledDataArray([true]),
-           @pdata([1]),
-           @pdata([1.0]),
-           @pdata([:A]),
-           PooledDataArray([:A])]
+           NullableArray([true]),
+           NullableArray([1]),
+           NullableArray([1.0]),
+           NullableArray([:A]),
+           NullableArray([:A])]
 
 for ind in inds
     if isequal(ind, :A) || ndims(ind) == 0
@@ -56,4 +51,12 @@ for name in names(i)
   i2[name] # Issue #715
 end
 
+#= Aliasing & Mutation =#
+
+# columns should not alias if scalar broadcasted
+df = DataFrame(A=[0],B=[0])
+df[1:end] = 0.0
+df[1,:A] = 1.0
+@test df[1,:B] === Nullable(0)
+
 end
diff --git a/test/io.jl b/test/io.jl
index c587a57b59..715de39e21 100644
--- a/test/io.jl
+++ b/test/io.jl
@@ -1,6 +1,7 @@
 module TestIO
     using Base.Test
     using DataFrames, Compat
+    using LaTeXStrings
 
     #test_group("We can read various file types.")
 
@@ -38,57 +39,57 @@ module TestIO
 
     @test size(df) == (58788, 25)
 
-    @test df[1, 1] === 1
-    @test df[1, 2] == "\$"
-    @test df[1, 3] === 1971
-    @test df[1, 4] === 121
-    @test df[1, 5] === NA
-    @test df[1, 6] === 6.4
-    @test df[1, 7] === 348
-    @test df[1, 8] === 4.5
-    @test df[1, 9] === 4.5
-    @test df[1, 10] === 4.5
-    @test df[1, 11] === 4.5
-    @test df[1, 12] === 14.5
-    @test df[1, 13] === 24.5
-    @test df[1, 14] === 24.5
-    @test df[1, 15] === 14.5
-    @test df[1, 16] === 4.5
-    @test df[1, 17] === 4.5
-    @test df[1, 18] == ""
-    @test df[1, 19] === 0
-    @test df[1, 20] === 0
-    @test df[1, 21] === 1
-    @test df[1, 22] === 1
-    @test df[1, 23] === 0
-    @test df[1, 24] === 0
-    @test df[1, 25] === 0
-
-    @test df[end, 1] === 58788
-    @test df[end, 2] == "xXx: State of the Union"
-    @test df[end, 3] === 2005
-    @test df[end, 4] === 101
-    @test df[end, 5] === 87000000
-    @test df[end, 6] === 3.9
-    @test df[end, 7] === 1584
-    @test df[end, 8] === 24.5
-    @test df[end, 9] === 4.5
-    @test df[end, 10] === 4.5
-    @test df[end, 11] === 4.5
-    @test df[end, 12] === 4.5
-    @test df[end, 13] === 14.5
-    @test df[end, 14] === 4.5
-    @test df[end, 15] === 4.5
-    @test df[end, 16] === 4.5
-    @test df[end, 17] === 14.5
-    @test df[end, 18] == "PG-13"
-    @test df[end, 19] === 1
-    @test df[end, 20] === 0
-    @test df[end, 21] === 0
-    @test df[end, 22] === 0
-    @test df[end, 23] === 0
-    @test df[end, 24] === 0
-    @test df[end, 25] === 0
+    @test df[1, 1] === Nullable(1)
+    @test isequal(df[1, 2], Nullable("\$"))
+    @test df[1, 3] === Nullable(1971)
+    @test df[1, 4] === Nullable(121)
+    @test isnull(df[1, 5])
+    @test df[1, 6] === Nullable(6.4)
+    @test df[1, 7] === Nullable(348)
+    @test df[1, 8] === Nullable(4.5)
+    @test df[1, 9] === Nullable(4.5)
+    @test df[1, 10] === Nullable(4.5)
+    @test df[1, 11] === Nullable(4.5)
+    @test df[1, 12] === Nullable(14.5)
+    @test df[1, 13] === Nullable(24.5)
+    @test df[1, 14] === Nullable(24.5)
+    @test df[1, 15] === Nullable(14.5)
+    @test df[1, 16] === Nullable(4.5)
+    @test df[1, 17] === Nullable(4.5)
+    @test isequal(df[1, 18], Nullable(""))
+    @test df[1, 19] === Nullable(0)
+    @test df[1, 20] === Nullable(0)
+    @test df[1, 21] === Nullable(1)
+    @test df[1, 22] === Nullable(1)
+    @test df[1, 23] === Nullable(0)
+    @test df[1, 24] === Nullable(0)
+    @test df[1, 25] === Nullable(0)
+
+    @test df[end, 1] === Nullable(58788)
+    @test isequal(df[end, 2], Nullable("xXx: State of the Union"))
+    @test df[end, 3] === Nullable(2005)
+    @test df[end, 4] === Nullable(101)
+    @test df[end, 5] === Nullable(87000000)
+    @test df[end, 6] === Nullable(3.9)
+    @test df[end, 7] === Nullable(1584)
+    @test df[end, 8] === Nullable(24.5)
+    @test df[end, 9] === Nullable(4.5)
+    @test df[end, 10] === Nullable(4.5)
+    @test df[end, 11] === Nullable(4.5)
+    @test df[end, 12] === Nullable(4.5)
+    @test df[end, 13] === Nullable(14.5)
+    @test df[end, 14] === Nullable(4.5)
+    @test df[end, 15] === Nullable(4.5)
+    @test df[end, 16] === Nullable(4.5)
+    @test df[end, 17] === Nullable(14.5)
+    @test isequal(df[end, 18], Nullable("PG-13"))
+    @test df[end, 19] === Nullable(1)
+    @test df[end, 20] === Nullable(0)
+    @test df[end, 21] === Nullable(0)
+    @test df[end, 22] === Nullable(0)
+    @test df[end, 23] === Nullable(0)
+    @test df[end, 24] === Nullable(0)
+    @test df[end, 25] === Nullable(0)
 
     #test_group("readtable handles common separators and infers them from extensions.")
 
@@ -97,9 +98,9 @@ module TestIO
     df3 = readtable("$data/separators/sample_data.wsv")
     df4 = readtable("$data/separators/sample_data_white.txt", separator = ' ')
 
-    @test df1 == df2
-    @test df2 == df3
-    @test df3 == df4
+    @test isequal(df1, df2)
+    @test isequal(df2, df3)
+    @test isequal(df3, df4)
 
     readtable("$data/quoting/quotedwhitespace.txt", separator = ' ')
 
@@ -129,23 +130,25 @@ module TestIO
     # df10 = readtable("$data/skiplines/skipfront.csv", skipstart = 3, header = false, skiprows = [4, 6])
     # names!(df10, names(df1))
 
-    @test df2 == df1
-    @test df3 == df1
-    @test df4 == df1
+    @test isequal(df2, df1)
+    @test isequal(df3, df1)
+    @test isequal(df4, df1)
 
     # Windows EOLS
-    @test df5 == df1
-    @test df6 == df1
-    @test df7 == df1
-    @test df8 == df1
+    @test isequal(df5, df1)
+    @test isequal(df6, df1)
+    @test isequal(df7, df1)
+    @test isequal(df8, df1)
 
-    # @test df9 == df1[3:end]
-    # @test df10 == df1[[1, 3:end]]
+    # @test isequal(df9, df1[3:end])
+    # @test isequal(df10, df1[[1, 3:end]])
 
     function normalize_eol!(df)
         for (name, col) in eachcol(df)
             if eltype(col) <: AbstractString
                 df[name] = map(s -> replace(s, "\r\n", "\n"), col)
+            elseif eltype(col) <: Nullable && eltype(eltype(col)) <: AbstractString
+                df[name] = map(s -> replace(get(s), "\r\n", "\n"), col)
             end
         end
         df
@@ -163,16 +166,16 @@ module TestIO
     # df2w = readtable(winpath; opts2...)
 
     # Normalize line endings in both and test equality
-    @test normalize_eol!(df1w) == normalize_eol!(df1)
+    @test isequal(normalize_eol!(df1w), normalize_eol!(df1))
     # @test normalize_eol!(df2w) == df1
 
     opts1[:nrows] = 3
     opts2[:nrows] = 3
 
-    @test normalize_eol!(readtable(osxpath; opts1...)) == df1[1:3, :]
-    # @test readtable(osxpath; opts2...) == df1[1:3, :]
-    @test normalize_eol!(readtable(winpath; opts1...)) == df1[1:3, :]
-    # @test readtable(winpath; opts2...) == df1[1:3, :]
+    @test isequal(normalize_eol!(readtable(osxpath; opts1...)), df1[1:3, :])
+    # @test isequalreadtable(osxpath; opts2...), df1[1:3, :]
+    @test isequal(normalize_eol!(readtable(winpath; opts1...)), df1[1:3, :])
+    # @test isequalreadtable(winpath; opts2...), df1[1:3, :])
 
     #test_group("readtable handles custom delimiters.")
 
@@ -181,55 +184,55 @@ module TestIO
     readtable("$data/separators/sample_data.csv", quotemark = Char[])
     @test_throws ErrorException readtable("$data/newlines/embedded_osx.csv", quotemark = Char[])
     df = readtable("$data/quoting/single.csv", quotemark = ['\''])
-    @test df == readtable("$data/quoting/mixed.csv", quotemark = ['\'', '"'])
+    @test isequal(df, readtable("$data/quoting/mixed.csv", quotemark = ['\'', '"']))
 
     # df = readtable("$data/decimal/period.csv")
-    # @test df[2, :A] == 0.3
-    # @test df[2, :B] == 4.0
+    # @test isequaldf[2, :A], 0.3)
+    # @test isequaldf[2, :B], 4.0)
 
-    # @test df == readtable("$data/decimal/comma.tsv", decimal = ',')
+    # @test isequal(df, readtable("$data/decimal/comma.tsv", decimal = ','))
 
     #test_group("readtable column names.")
 
     ns = [:Var1, :Var2, :Var3, :Var4, :Var5]
     df = readtable("$data/typeinference/mixedtypes.csv")
     names!(df, ns)
-    @test df == readtable("$data/typeinference/mixedtypes.csv", names = ns)
+    @test isequal(df, readtable("$data/typeinference/mixedtypes.csv", names = ns))
 
     df = readtable("$data/separators/sample_data.csv", header = false, names = ns[1:3])
-    @test df[1, :Var1] == 0
+    @test isequal(df[1, :Var1], Nullable(0))
     df = readtable("$data/separators/sample_data.csv", names = ns[1:3])
-    @test df[1, :Var1] == 1
+    @test isequal(df[1, :Var1], Nullable(1))
 
     #test_group("Properties of data frames returned by readtable method.")
 
     # Readtable ignorepadding
     io = IOBuffer("A , \tB  , C\n1 , \t2, 3\n")
-    @test readtable(io, ignorepadding = true) == DataFrame(A = 1, B = 2, C = 3)
+    @test isequal(readtable(io, ignorepadding = true), DataFrame(A = 1, B = 2, C = 3))
 
     # Readtable c-style escape options
 
     df = readtable("$data/escapes/escapes.csv", allowescapes = true)
-    @test df[1, :V] == "\t\r\n"
-    @test df[2, :V] == "\\\\t"
-    @test df[3, :V] == "\\\""
+    @test isequal(df[1, :V], Nullable("\t\r\n"))
+    @test isequal(df[2, :V], Nullable("\\\\t"))
+    @test isequal(df[3, :V], Nullable("\\\""))
 
     df = readtable("$data/escapes/escapes.csv")
-    @test df[1, :V] == "\\t\\r\\n"
-    @test df[2, :V] == "\\\\t"
-    @test df[3, :V] == "\\\""
+    @test isequal(df[1, :V], Nullable("\\t\\r\\n"))
+    @test isequal(df[2, :V], Nullable("\\\\t"))
+    @test isequal(df[3, :V], Nullable("\\\""))
 
     # df = readtable("$data/escapes/escapes.csv", escapechars = ['"'], nrows = 2)
-    # @test df[1, :V] == "\\t\\r\\n"
-    # @test df[2, :V] == "\\\\\\\\t"
+    # @test isequal(df[1, :V], "\\t\\r\\n")
+    # @test isequal(df[2, :V], "\\\\\\\\t")
 
     # Readtable with makefactors active should only make factors from columns
     # of strings.
     filename = "$data/factors/mixedvartypes.csv"
     df = readtable(filename, makefactors = true)
 
-    @test typeof(df[:factorvar]) == PooledDataArray{Compat.UTF8String,UInt32,1}
-    @test typeof(df[:floatvar]) == DataArray{Float64,1}
+    @test isa(df[:factorvar], NullableCategoricalArray{Compat.UTF8String,1})
+    @test isa(df[:floatvar], NullableArray{Float64,1})
 
     # Readtable shouldn't silently drop data when reading highly compressed gz.
     df = readtable("$data/compressed/1000x2.csv.gz")
@@ -238,79 +241,79 @@ module TestIO
     # Readtable type inference
     filename = "$data/typeinference/bool.csv"
     df = readtable(filename)
-    @test typeof(df[:Name]) == DataArray{Compat.UTF8String,1}
-    @test typeof(df[:IsMale]) == DataArray{Bool,1}
-    @test df[:IsMale][1] == true
-    @test df[:IsMale][4] == false
+    @test isa(df[:Name], NullableArray{Compat.UTF8String,1})
+    @test isa(df[:IsMale], NullableArray{Bool,1})
+    @test get(df[:IsMale][1])
+    @test !get(df[:IsMale][4])
 
     filename = "$data/typeinference/standardtypes.csv"
     df = readtable(filename)
-    @test typeof(df[:IntColumn]) == DataArray{Int,1}
-    @test typeof(df[:IntlikeColumn]) == DataArray{Float64,1}
-    @test typeof(df[:FloatColumn]) == DataArray{Float64,1}
-    @test typeof(df[:BoolColumn]) == DataArray{Bool,1}
-    @test typeof(df[:StringColumn]) == DataArray{Compat.UTF8String,1}
+    @test isa(df[:IntColumn], NullableArray{Int,1})
+    @test isa(df[:IntlikeColumn], NullableArray{Float64,1})
+    @test isa(df[:FloatColumn], NullableArray{Float64,1})
+    @test isa(df[:BoolColumn], NullableArray{Bool,1})
+    @test isa(df[:StringColumn], NullableArray{Compat.UTF8String,1})
 
     filename = "$data/typeinference/mixedtypes.csv"
     df = readtable(filename)
-    @test typeof(df[:c1]) == DataArray{Compat.UTF8String,1}
-    @test df[:c1][1] == "1"
-    @test df[:c1][2] == "2.0"
-    @test df[:c1][3] == "true"
-    @test typeof(df[:c2]) == DataArray{Float64,1}
-    @test df[:c2][1] == 1.0
-    @test df[:c2][2] == 3.0
-    @test df[:c2][3] == 4.5
-    @test typeof(df[:c3]) == DataArray{Compat.UTF8String,1}
-    @test df[:c3][1] == "0"
-    @test df[:c3][2] == "1"
-    @test df[:c3][3] == "f"
-    @test typeof(df[:c4]) == DataArray{Bool,1}
-    @test df[:c4][1] == true
-    @test df[:c4][2] == false
-    @test df[:c4][3] == true
-    @test typeof(df[:c5]) == DataArray{Compat.UTF8String,1}
-    @test df[:c5][1] == "False"
-    @test df[:c5][2] == "true"
-    @test df[:c5][3] == "true"
+    @test isa(df[:c1], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:c1][1], Nullable("1"))
+    @test isequal(df[:c1][2], Nullable("2.0"))
+    @test isequal(df[:c1][3], Nullable("true"))
+    @test isa(df[:c2], NullableArray{Float64,1})
+    @test isequal(df[:c2][1], Nullable(1.0))
+    @test isequal(df[:c2][2], Nullable(3.0))
+    @test isequal(df[:c2][3], Nullable(4.5))
+    @test isa(df[:c3], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:c3][1], Nullable("0"))
+    @test isequal(df[:c3][2], Nullable("1"))
+    @test isequal(df[:c3][3], Nullable("f"))
+    @test isa(df[:c4], NullableArray{Bool,1})
+    @test isequal(df[:c4][1], Nullable(true))
+    @test isequal(df[:c4][2], Nullable(false))
+    @test isequal(df[:c4][3], Nullable(true))
+    @test isa(df[:c5], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:c5][1], Nullable("False"))
+    @test isequal(df[:c5][2], Nullable("true"))
+    @test isequal(df[:c5][3], Nullable("true"))
 
     # Readtable defining column types
     filename = "$data/definedtypes/mixedvartypes.csv"
 
     df = readtable(filename)
-    @test typeof(df[:n]) == DataArray{Int,1}
-    @test df[:n][1] == 1
-    @test typeof(df[:s]) == DataArray{Compat.UTF8String,1}
-    @test df[:s][1] == "text"
-    @test typeof(df[:f]) == DataArray{Float64,1}
-    @test df[:f][1] == 2.3
-    @test typeof(df[:b]) == DataArray{Bool,1}
-    @test df[:b][1] == true
+    @test isa(df[:n], NullableArray{Int,1})
+    @test isequal(df[:n][1], Nullable(1))
+    @test isa(df[:s], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:s][1], Nullable("text"))
+    @test isa(df[:f], NullableArray{Float64,1})
+    @test isequal(df[:f][1], Nullable(2.3))
+    @test isa(df[:b], NullableArray{Bool,1})
+    @test isequal(df[:b][1], Nullable(true))
 
     df = readtable(filename, eltypes = [Int64, Compat.UTF8String, Float64, Bool])
-    @test typeof(df[:n]) == DataArray{Int64,1}
-    @test df[:n][1] == 1
-    @test typeof(df[:s]) == DataArray{Compat.UTF8String,1}
-    @test df[:s][1] == "text"
-    @test df[:s][4] == "text ole"
-    @test typeof(df[:f]) == DataArray{Float64,1}
-    @test df[:f][1] == 2.3
-    @test typeof(df[:b]) == DataArray{Bool,1}
-    @test df[:b][1] == true
-    @test df[:b][2] == false
+    @test isa(df[:n], NullableArray{Int64,1})
+    @test isequal(df[:n][1], Nullable(1))
+    @test isa(df[:s], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:s][1], Nullable("text"))
+    @test isequal(df[:s][4], Nullable("text ole"))
+    @test isa(df[:f], NullableArray{Float64,1})
+    @test isequal(df[:f][1], Nullable(2.3))
+    @test isa(df[:b], NullableArray{Bool,1})
+    @test isequal(df[:b][1], Nullable(true))
+    @test isequal(df[:b][2], Nullable(false))
 
     df = readtable(filename, eltypes = [Int64, Compat.UTF8String, Float64, Compat.UTF8String])
-    @test typeof(df[:n]) == DataArray{Int64,1}
-    @test df[:n][1] == 1.0
-    @test isna(df[:s][3])
-    @test typeof(df[:f]) == DataArray{Float64,1}
+    @test isa(df[:n], NullableArray{Int64,1})
+    @test isequal(df[:n][1], Nullable(1.0))
+    @test isnull(df[:s][3])
+    @test isa(df[:f], NullableArray{Float64,1})
     # Float are not converted to int
-    @test df[:f][1] == 2.3
-    @test df[:f][2] == 0.2
-    @test df[:f][3] == 5.7
-    @test typeof(df[:b]) == DataArray{Compat.UTF8String,1}
-    @test df[:b][1] == "T"
-    @test df[:b][2] == "FALSE"
+    @test isequal(df[:f][1], Nullable(2.3))
+    @test isequal(df[:f][2], Nullable(0.2))
+    @test isequal(df[:f][3], Nullable(5.7))
+    @test isa(df[:b], NullableArray{Compat.UTF8String,1})
+    @test isequal(df[:b][1], Nullable("T"))
+    @test isequal(df[:b][2], Nullable("FALSE"))
 
     # Readtable name normalization
     abnormal = "\u212b"
@@ -324,12 +327,13 @@ module TestIO
     io = IOBuffer(abnormal*",%_B*\tC*,end\n1,2,3\n")
     @test names(readtable(io, normalizenames=false)) == [Symbol(abnormal),Symbol("%_B*\tC*"),:end]
 
-    # Test writetable with NA and compare to the results
+    # Test writetable with Nullable() and compare to the results
     tf = tempname()
     isfile(tf) && rm(tf)
-    df = DataFrame(A = @data([1,NA]), B = @data(["b", NA]))
+    df = DataFrame(A = NullableArray(Nullable{Int}[1,Nullable()]),
+                   B = NullableArray(Nullable{String}["b", Nullable()]))
     writetable(tf, df)
-    @test readcsv(tf) == ["A" "B"; 1 "b"; "NA" "NA"]
+    @test readcsv(tf) == ["A" "B"; 1 "b"; "NULL" "NULL"]
 
     # Test writetable with nastring set and compare to the results
     isfile(tf) && rm(tf)
@@ -338,10 +342,10 @@ module TestIO
     rm(tf)
 
     # Test writetable with append
-    df1 = DataFrame(a = @data([1, 2, 3]), b = @data([4, 5, 6]))
-    df2 = DataFrame(a = @data([1, 2, 3]), b = @data([4, 5, 6]))
-    df3 = DataFrame(a = @data([1, 2, 3]), c = @data([4, 5, 6])) # 2nd column mismatch
-    df3b = DataFrame(a = @data([1, 2, 3]), b = @data([4, 5, 6]), c = @data([4, 5, 6])) # number of columns mismatch
+    df1 = DataFrame(a = NullableArray([1, 2, 3]), b = NullableArray([4, 5, 6]))
+    df2 = DataFrame(a = NullableArray([1, 2, 3]), b = NullableArray([4, 5, 6]))
+    df3 = DataFrame(a = NullableArray([1, 2, 3]), c = NullableArray([4, 5, 6])) # 2nd column mismatch
+    df3b = DataFrame(a = NullableArray([1, 2, 3]), b = NullableArray([4, 5, 6]), c = NullableArray([4, 5, 6])) # number of columns mismatch
 
 
     # Would use joinpath(tempdir(), randstring()) to get around tempname
@@ -352,22 +356,22 @@ module TestIO
 
     # Written as normal if file doesn't exist
     writetable(tf, df1, append = true)
-    @test readtable(tf) == df1
+    @test isequal(readtable(tf), df1)
 
     # Written as normal if file is empty
     open(io -> print(io, ""), tf, "w")
     writetable(tf, df1, append = true)
-    @test readtable(tf) == df1
+    @test isequal(readtable(tf), df1)
 
     # Appends to existing file if append == true
     writetable(tf, df1)
     writetable(tf, df2, header = false, append = true)
-    @test readtable(tf) == vcat(df1, df2)
+    @test isequal(readtable(tf), vcat(df1, df2))
 
     # Overwrites file if append == false
     writetable(tf, df1)
     writetable(tf, df2)
-    @test readtable(tf) == df2
+    @test isequal(readtable(tf), df2)
 
     # Enforces matching column names iff append == true && header == true
     writetable(tf, df2)
@@ -387,7 +391,7 @@ module TestIO
 
     # Make sure the ' doesn't get escaped for no reason
     writetable(tf, df)
-    @test readtable(tf) == df
+    @test isequal(readtable(tf), df)
 
     # Make sure the ' does get escaped when needed
     writetable(tf, df, quotemark='\'')
@@ -405,10 +409,10 @@ module TestIO
         """
     @test size(df1) == (4, 3)
     @test names(df1) == [:name, :age, :squidPerWeek]
-    @test df1[1] == ["Alice","Bob","Carol","Eve"]
-    @test df1[2] == [36,24,58,49]
-    @test df1[3] == [3.14,0,2.71,7.77]
-    @test typeof(df1[1]) <: DataArray
+    @test isequal(df1[1], NullableArray(["Alice","Bob","Carol","Eve"]))
+    @test isequal(df1[2], NullableArray([36,24,58,49]))
+    @test isequal(df1[3], NullableArray([3.14,0,2.71,7.77]))
+    @test isa(df1[1], NullableArray{Compat.UTF8String,1})
 
     # Test @wsv_str
     df2 = wsv"""
@@ -418,7 +422,7 @@ module TestIO
         Carol  58         2.71
         Eve    49         7.77
         """
-    @test df2 == df1
+    @test isequal(df2, df1)
 
     # Test @tsv_str
     df3 = tsv"""
@@ -428,7 +432,7 @@ module TestIO
         Carol	58	2.71
         Eve	49	7.77
         """
-    @test df3 == df1
+    @test isequal(df3, df1)
 
     # csv2 can't be tested until non-'.' decimals are implemented
     #df4 = csv2"""
@@ -438,7 +442,7 @@ module TestIO
     #    Carol;  58;         2,71
     #    Eve;    49;         7,77
     #    """
-    #@test df4 == df1
+    #@test isequal(df4, df1)
 
     # Test 'f' flag
     df5 = csv"""
@@ -448,7 +452,7 @@ module TestIO
         Carol,  58,         2.71
         Eve,    49,         7.77
         """f
-    @test typeof(df5[1]) <: PooledDataArray
+    @test isa(df5[1], NullableCategoricalArray{Compat.UTF8String,1})
 
     # Test 'c' flag
     df6 = csv"""
@@ -458,7 +462,7 @@ module TestIO
         #Carol,  58,         2.71
         Eve,    49,         7.77
         """c
-    @test df6 == df1[[1,2,4],:]
+    @test isequal(df6, df1[[1,2,4],:])
 
     # Test 'H' flag
     df7 = csv"""
@@ -468,7 +472,8 @@ module TestIO
         Eve,    49,         7.77
         """H
     @test names(df7) == [:x1,:x2,:x3]
-    @test Array(df7) == Array(df1)
+    names!(df7, names(df1))
+    @test isequal(df7, df1)
 
     # Test multiple flags at once
     df8 = csv"""
@@ -477,12 +482,49 @@ module TestIO
         #Carol,  58,         2.71
         Eve,    49,         7.77
         """fcH
-    @test typeof(df8[1]) <: PooledDataArray
+    @test isa(df8[1], NullableCategoricalArray{Compat.UTF8String,1})
     @test names(df8) == [:x1,:x2,:x3]
-    @test Array(df8) == Array(df1[[1,2,4],:])
+    names!(df8, names(df1))
+    @test isequal(df8, df1[[1,2,4],:])
 
     # Test invalid flag
     # Need to wrap macro call inside eval to prevent the error from being
     # thrown prematurely
     @test_throws ArgumentError eval(:(csv"foo,bar"a))
+
+    # Test LaTeX export
+    df = DataFrame(A = 1:4,
+                   B = ["\$10.0", "M&F", "A~B", "\\alpha"],
+                   C = [L"\alpha", L"\beta", L"\gamma", L"\sum_{i=1}^n \delta_i"],
+                   D = [1.0, 2.0, Nullable(), 3.0]
+                   )
+    str = """
+        \\begin{tabular}{r|cccc}
+        \t& A & B & C & D\\\\
+        \t\\hline
+        \t1 & 1 & \\\$10.0 & \$\\alpha\$ & 1.0 \\\\
+        \t2 & 2 & M\\&F & \$\\beta\$ & 2.0 \\\\
+        \t3 & 3 & A\\textasciitilde{}B & \$\\gamma\$ &  \\\\
+        \t4 & 4 & \\textbackslash{}alpha & \$\\sum_{i=1}^n \\delta_i\$ & 3.0 \\\\
+        \\end{tabular}
+        """
+    @test reprmime(MIME("text/latex"), df) == str
+
+    #Test HTML output for IJulia and similar
+    df = DataFrame(Fish = ["Suzy", "Amir"], Mass = [1.5, Nullable()])
+    io = IOBuffer()
+    show(io, "text/html", df)
+    str = takebuf_string(io)
+    @test str == "<table class=\"data-frame\"><tr><th></th><th>Fish</th><th>Mass</th></tr><tr><th>1</th><td>Suzy</td><td>1.5</td></tr><tr><th>2</th><td>Amir</td><td>#NULL</td></tr></table>"
+
+    # test limit attribute of IOContext is used
+    df = DataFrame(a=collect(1:1000))
+    ioc = IOContext(IOBuffer(), displaysize=(10, 10), limit=false)
+    show(ioc, "text/html", df)
+    @test length(takebuf_string(ioc.io)) > 10000
+
+    io = IOBuffer()
+    show(io, "text/html", df)
+    @test length(takebuf_string(io)) < 10000
+
 end
diff --git a/test/iteration.jl b/test/iteration.jl
index 5c712298cf..57c17becd4 100644
--- a/test/iteration.jl
+++ b/test/iteration.jl
@@ -1,34 +1,22 @@
 module TestIteration
     using Base.Test, DataFrames, Compat
 
-    dv = @data([1, 2, NA])
-    dm = DataArray([1 2; 3 4])
-    dt = DataArray(zeros(2, 2, 2))
+    dv = NullableArray(Nullable{Int}[1, 2, Nullable()])
+    dm = NullableArray([1 2; 3 4])
+    dt = NullableArray(zeros(2, 2, 2))
 
     df = DataFrame(A = 1:2, B = 2:3)
 
-    for el in dv
-        @test ndims(el) == 0
-    end
-
-    for el in dm
-        @test ndims(el) == 0
-    end
-
-    for el in dt
-        @test ndims(el) == 0
-    end
-
     for row in eachrow(df)
         @test isa(row, DataFrameRow)
-        @test row[:B]-row[:A] == 1
+        @test isequal(row[:B]-row[:A], Nullable(1))
 
         # issue #683 (https://github.com/JuliaStats/DataFrames.jl/pull/683)
         @test typeof(collect(row)) == @compat Array{Tuple{Symbol, Any}, 1}
     end
 
     for col in eachcol(df)
-        @test isa(col, @compat Tuple{Symbol, AbstractDataVector})
+        @test isa(col, @compat Tuple{Symbol, NullableVector})
     end
 
     @test isequal(map(x -> minimum(convert(Array, x)), eachrow(df)), Any[1,2])
@@ -37,22 +25,22 @@ module TestIteration
     row = DataFrameRow(df, 1)
 
     row[:A] = 100
-    @test df[1, :A] == 100
+    @test isequal(df[1, :A], Nullable(100))
 
     row[1] = 101
-    @test df[1, :A] == 101
+    @test isequal(df[1, :A], Nullable(101))
 
     df = DataFrame(A = 1:4, B = ["M", "F", "F", "M"])
 
     s1 = sub(df, 1:3)
     s1[2,:A] = 4
-    @test df[2, :A] == 4
-    @test sub(s1, 1:2) == sub(df, 1:2)
+    @test isequal(df[2, :A], Nullable(4))
+    @test isequal(sub(s1, 1:2), sub(df, 1:2))
 
     s2 = sub(df, 1:2:3)
     s2[2, :B] = "M"
-    @test df[3, :B] == "M"
-    @test sub(s2, 1:1:2) == sub(df, [1,3])
+    @test isequal(df[3, :B], Nullable("M"))
+    @test isequal(sub(s2, 1:1:2), sub(df, [1,3]))
 
     # @test_fail for x in df; end # Raises an error
 end
diff --git a/test/join.jl b/test/join.jl
index b612eecc02..5be59c7915 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -14,15 +14,15 @@ module TestJoin
 
     # Test output of various join types
     outer = DataFrame(ID = [1, 2, 2, 3, 4],
-                      Name = @data(["John Doe", "Jane Doe", "Jane Doe", "Joe Blogs", NA]),
-                      Job = @data(["Lawyer", "Doctor", "Florist", NA, "Farmer"]))
+                      Name = NullableArray(Nullable{String}["John Doe", "Jane Doe", "Jane Doe", "Joe Blogs", Nullable()]),
+                      Job = NullableArray(Nullable{String}["Lawyer", "Doctor", "Florist", Nullable(), "Farmer"]))
 
     # (Tests use current column ordering but don't promote it)
-    right = outer[!isna(outer[:Job]), [:Name, :ID, :Job]]
-    left = outer[!isna(outer[:Name]), :]
-    inner = left[!isna(left[:Job]), :]
+    right = outer[Bool[!isnull(x) for x in outer[:Job]], [:Name, :ID, :Job]]
+    left = outer[Bool[!isnull(x) for x in outer[:Name]], :]
+    inner = left[Bool[!isnull(x) for x in left[:Job]], :]
     semi = unique(inner[:, [:ID, :Name]])
-    anti = left[isna(left[:Job]), [:ID, :Name]]
+    anti = left[Bool[isnull(x) for x in left[:Job]], [:ID, :Name]]
 
     @test isequal(join(name, job, on = :ID), inner)
     @test isequal(join(name, job, on = :ID, kind = :inner), inner)
@@ -59,7 +59,7 @@ module TestJoin
                       B = ['a', 'a', 'a', 'b', 'b', 'b'],
                       C = [3, 4, 5, 3, 4, 5])
 
-    @test join(df1, df2[[:C]], kind = :cross) == cross
+    @test isequal(join(df1, df2[[:C]], kind = :cross), cross)
 
     # Cross joins handle naming collisions
     @test size(join(df1, df1, kind = :cross)) == (4, 4)
@@ -67,11 +67,44 @@ module TestJoin
     # Cross joins don't take keys
     @test_throws ArgumentError join(df1, df2, on = :A, kind = :cross)
 
+    # test empty inputs
+    simple_df(len::Int, col=:A) = (df = DataFrame(); df[col]=collect(1:len); df)
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :left),  simple_df(0))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :left),  simple_df(2))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :left),  simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :right), simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :right), simple_df(2))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :right), simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :inner), simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :inner), simple_df(0))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :inner), simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :outer), simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :outer), simple_df(2))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :outer), simple_df(2))
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :semi),  simple_df(0))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :semi),  simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :semi),  simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(0), on = :A, kind = :anti),  simple_df(0))
+    @test isequal(join(simple_df(2), simple_df(0), on = :A, kind = :anti),  simple_df(2))
+    @test isequal(join(simple_df(0), simple_df(2), on = :A, kind = :anti),  simple_df(0))
+    @test isequal(join(simple_df(0), simple_df(0, :B), kind = :cross), DataFrame(A=Int[], B=Int[]))
+    @test isequal(join(simple_df(0), simple_df(2, :B), kind = :cross), DataFrame(A=Int[], B=Int[]))
+    @test isequal(join(simple_df(2), simple_df(0, :B), kind = :cross), DataFrame(A=Int[], B=Int[]))
+
     # issue #960
     df1 = DataFrame(A = 1:50,
                     B = 1:50,
                     C = 1)
-    pool!(df1, :A)
-    pool!(df1, :B)
+    categorical!(df1, :A)
+    categorical!(df1, :B)
     join(df1, df1, on = [:A, :B], kind = :inner)
+
+    # Test that Array{Nullable} works when combined with NullableArray (#1088)
+    df = DataFrame(Name = Nullable{String}["A", "B", "C"],
+                   Mass = [1.5, 2.2, 1.1])
+    df2 = DataFrame(Name = ["A", "B", "C", "A"],
+                    Quantity = [3, 3, 2, 4])
+    @test join(df2, df, on=:Name, kind=:left) == DataFrame(Name = ["A", "A", "B", "C"],
+                                                           Quantity = [3, 4, 3, 2],
+                                                           Mass = [1.5, 1.5, 2.2, 1.1])
 end
diff --git a/test/show.jl b/test/show.jl
index d5afb45840..ef492ba8d3 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -1,6 +1,7 @@
 module TestShow
     using DataFrames
     using Compat
+    using Base.Test
     import Compat.String
     df = DataFrame(A = 1:3, B = ["x", "y", "z"])
 
@@ -35,4 +36,16 @@ module TestShow
     show(io, A)
     A = DataFrames.RepeatedVector([1, 2, 3], 1, 5)
     show(io, A)
+
+    #Test show output for REPL and similar
+    df = DataFrame(Fish = ["Suzy", "Amir"], Mass = [1.5, Nullable()])
+    io = IOBuffer()
+    show(io, df)
+    str = takebuf_string(io)
+    @test str == """
+2×2 DataFrames.DataFrame
+│ Row │ Fish │ Mass  │
+├─────┼──────┼───────┤
+│ 1   │ Suzy │ 1.5   │
+│ 2   │ Amir │ #NULL │"""
 end
diff --git a/test/sort.jl b/test/sort.jl
index cf48fab744..7760a3dd81 100644
--- a/test/sort.jl
+++ b/test/sort.jl
@@ -2,20 +2,20 @@ module TestSort
     using Base.Test
     using DataFrames
 
-    dv1 = @data([9, 1, 8, NA, 3, 3, 7, NA])
-    dv2 = 1.0 * dv1
-    dv3 = DataArray([1:8;])
-    pdv1 = convert(PooledDataArray, dv1)
+    dv1 = NullableArray(Nullable{Int}[9, 1, 8, Nullable(), 3, 3, 7, Nullable()])
+    dv2 = NullableArray(Nullable{Int}[9, 1, 8, Nullable(), 3, 3, 7, Nullable()])
+    dv3 = NullableArray(1:8)
+    cv1 = NullableCategoricalArray(dv1, ordered=true)
 
-    d = DataFrame(dv1 = dv1, dv2 = dv2, dv3 = dv3, pdv1 = pdv1)
+    d = DataFrame(dv1 = dv1, dv2 = dv2, dv3 = dv3, cv1 = cv1)
 
     @test sortperm(d) == sortperm(dv1)
     @test sortperm(d[[:dv3, :dv1]]) == sortperm(dv3)
-    @test sort(d, cols=:dv1)[:dv3] == sortperm(dv1)
-    @test sort(d, cols=:dv2)[:dv3] == sortperm(dv1)
-    @test sort(d, cols=:pdv1)[:dv3] == sortperm(dv1)
-    @test sort(d, cols=[:dv1, :pdv1])[:dv3] == sortperm(dv1)
-    @test sort(d, cols=[:dv1, :dv3])[:dv3] == sortperm(dv1)
+    @test isequal(sort(d, cols=:dv1)[:dv3], NullableArray(sortperm(dv1)))
+    @test isequal(sort(d, cols=:dv2)[:dv3], NullableArray(sortperm(dv1)))
+    @test isequal(sort(d, cols=:cv1)[:dv3], NullableArray(sortperm(dv1)))
+    @test isequal(sort(d, cols=[:dv1, :cv1])[:dv3], NullableArray(sortperm(dv1)))
+    @test isequal(sort(d, cols=[:dv1, :dv3])[:dv3], NullableArray(sortperm(dv1)))
 
     df = DataFrame(rank=rand(1:12, 1000),
                    chrom=rand(1:24, 1000),
@@ -33,13 +33,17 @@ module TestSort
     @test issorted(ds2, cols=(order(:rank, rev=true), :chrom, :pos))
     @test issorted(ds2, rev=(true, false, false))
 
-    @test ds2 == ds
+    @test isequal(ds2, ds)
 
     sort!(df, cols=(:rank, :chrom, :pos), rev=(true, false, false))
     @test issorted(df, cols=(order(:rank, rev=true), :chrom, :pos))
     @test issorted(df, rev=(true, false, false))
 
-    @test df == ds
-
+    @test isequal(df, ds)
 
+    # Check that columns that shares the same underlying array are only permuted once PR#1072
+    df = DataFrame(a=[2,1])
+    df[:b] = df[:a]
+    sort!(df, cols=:a)
+    @test df == DataFrame(a=[1,2],b=[1,2])
 end
diff --git a/test/statsmodel.jl b/test/statsmodel.jl
index ca9dcab6f7..9d9e5cd3cc 100644
--- a/test/statsmodel.jl
+++ b/test/statsmodel.jl
@@ -33,12 +33,12 @@ d[:x4] = [17:20;]
 
 f = y ~ x1 * x2
 m = fit(DummyMod, f, d)
-@test model_response(m) == d[:y]
+@test model_response(m) == Array(d[:y])
 
 ## test prediction method
 ## vanilla
 StatsBase.predict(mod::DummyMod) = mod.x * mod.beta
-@test predict(m) == [ ones(size(d,1)) d[:x1] d[:x2] d[:x1].*d[:x2] ] * collect(1:4)
+@test predict(m) == [ ones(size(d,1)) Array(d[:x1]) Array(d[:x2]) Array(d[:x1]).*Array(d[:x2]) ] * collect(1:4)
 
 ## new data from matrix
 StatsBase.predict(mod::DummyMod, newX::Matrix) = newX * mod.beta
@@ -46,10 +46,10 @@ mm = ModelMatrix(ModelFrame(f, d))
 @test predict(m, mm.m) == mm.m * collect(1:4)
 
 ## new data from DataFrame (via ModelMatrix)
-@test predict(m, d) == predict(m, mm.m)
+@test isequal(predict(m, d), NullableArray(predict(m, mm.m)))
 
 d2 = deepcopy(d)
-d2[3, :x1] = NA
+d2[3, :x1] = Nullable()
 @test length(predict(m, d2)) == 4
 
 ## test copying of names from Terms to CoefTable
@@ -61,23 +61,23 @@ io = IOBuffer()
 show(io, m)
 
 ## with categorical variables
-d[:x1p] = PooledDataArray(d[:x1])
+d[:x1p] = NullableCategoricalArray(d[:x1])
 f2 = y ~ x1p
 m2 = fit(DummyMod, f2, d)
 
 @test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"]
 
 ## predict w/ new data missing levels
-@test predict(m2, d[2:4, :]) == predict(m2)[2:4]
+@test isequal(predict(m2, d[2:4, :]), NullableArray(predict(m2)[2:4]))
 
 ## predict w/ new data with _extra_ levels (throws an error)
 d3 = deepcopy(d)
 d3[1, :x1] = 0
-d3[:x1p] = PooledDataArray(d3[:x1])
+d3[:x1p] = NullableCategoricalVector(d3[:x1])
 @test_throws ArgumentError predict(m2, d3)
 
 ## fit with contrasts specified
-d[:x2p] = PooledDataArray(d[:x2])
+d[:x2p] = NullableCategoricalVector(d[:x2])
 f3 = y ~ x1p + x2p
 m3 = fit(DummyMod, f3, d)
 fit(DummyMod, f3, d, contrasts = Dict(:x1p => EffectsCoding()))
diff --git a/test/utils.jl b/test/utils.jl
index cc4e5bc931..9875fc4eb4 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -38,21 +38,21 @@ module TestUtils
              "Expected if Julia was not built from source.")
     end
 
-    @test DataFrames.countna([1:3;]) == 0
-
-    data = @data rand(20)
-    @test DataFrames.countna(data) == 0
-    data[sample(1:20, 11, replace=false)] = NA
-    @test DataFrames.countna(data) == 11
-    data[1:end] = NA
-    @test DataFrames.countna(data) == 20
-
-    pdata = @data sample(1:5, 20)
-    @test DataFrames.countna(pdata) == 0
-    pdata[sample(1:20, 11, replace=false)] = NA
-    @test DataFrames.countna(pdata) == 11
-    pdata[1:end] = NA
-    @test DataFrames.countna(pdata) == 20
+    @test DataFrames.countnull([1:3;]) == 0
+
+    data = NullableArray(rand(20))
+    @test DataFrames.countnull(data) == 0
+    data[sample(1:20, 11, replace=false)] = Nullable()
+    @test DataFrames.countnull(data) == 11
+    data[1:end] = Nullable()
+    @test DataFrames.countnull(data) == 20
+
+    pdata = NullableArray(sample(1:5, 20))
+    @test DataFrames.countnull(pdata) == 0
+    pdata[sample(1:20, 11, replace=false)] = Nullable()
+    @test DataFrames.countnull(pdata) == 11
+    pdata[1:end] = Nullable()
+    @test DataFrames.countnull(pdata) == 20
 
     funs = [mean, sum, var, x -> sum(x)]
     if string(funs[end]) == "(anonymous function)" # Julia < 0.5