From be2b4dd3b5b5a9612ee5903f8740546626ae0664 Mon Sep 17 00:00:00 2001
From: Minchul Lee <bab2min@gmail.com>
Date: Fri, 16 Jul 2021 01:28:13 +0900
Subject: [PATCH] implemented (#24), fixed (#27)

---
 .github/workflows/pull_request_test.yml |   20 +-
 .github/workflows/release.yml           |   20 +-
 Benchmark.vcxproj                       |  377 +++++++-
 Benchmark.vcxproj.filters               |   23 +-
 BenchmarkMv.vcxproj                     |  370 +++++++-
 BenchmarkMv.vcxproj.filters             |    2 +-
 EigenRand.sln                           |  146 +++-
 EigenRand/Core.h                        |   14 +-
 EigenRand/Dists/Basic.h                 |  206 ++++-
 EigenRand/Dists/NormalExp.h             |   14 +-
 EigenRand/EigenRand                     |    4 +-
 EigenRand/Macro.h                       |    6 +-
 EigenRand/MorePacketMath.h              | 1057 +----------------------
 EigenRand/PacketFilter.h                |    2 +-
 EigenRand/PacketRandomEngine.h          |    4 +-
 EigenRand/RandUtils.h                   |   12 +-
 EigenRand/arch/AVX/MorePacketMath.h     |  601 +++++++++++++
 EigenRand/arch/NEON/MorePacketMath.h    |   65 ++
 EigenRand/arch/SSE/MorePacketMath.h     |  487 +++++++++++
 README.md                               |    5 +
 TestAccuracy.vcxproj                    |  366 +++++++-
 TestAccuracy.vcxproj.filters            |    2 +-
 {test => benchmark}/accuracy.cpp        |   95 +-
 {test => benchmark}/benchmark.cpp       |    4 +-
 {test => benchmark}/benchmark_mv.cpp    |    0
 {test => benchmark}/comp_scipy.py       |    0
 doxygen/Doxyfile                        |    2 +-
 test/packages.config                    |    4 +
 test/test.cpp                           |   72 ++
 test/test.vcxproj                       |  130 +++
 30 files changed, 2954 insertions(+), 1156 deletions(-)
 create mode 100644 EigenRand/arch/AVX/MorePacketMath.h
 create mode 100644 EigenRand/arch/NEON/MorePacketMath.h
 create mode 100644 EigenRand/arch/SSE/MorePacketMath.h
 rename {test => benchmark}/accuracy.cpp (99%)
 rename {test => benchmark}/benchmark.cpp (99%)
 rename {test => benchmark}/benchmark_mv.cpp (100%)
 rename {test => benchmark}/comp_scipy.py (100%)
 create mode 100644 test/packages.config
 create mode 100644 test/test.cpp
 create mode 100644 test/test.vcxproj

diff --git a/.github/workflows/pull_request_test.yml b/.github/workflows/pull_request_test.yml
index c4b0235..d875de0 100644
--- a/.github/workflows/pull_request_test.yml
+++ b/.github/workflows/pull_request_test.yml
@@ -26,20 +26,20 @@ jobs:
         mv eigen include
     - name: Build Bench
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark.cpp -o bench.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark.cpp -o bench.out
     - name: Run Bench
       run: |
         cat /proc/cpuinfo
         ./bench.out
     - name: Build BenchMv
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark_mv.cpp -o benchmv.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark_mv.cpp -o benchmv.out
     - name: Run BenchMv
       run: |
         ./benchmv.out
     - name: Build Accuracy
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/accuracy.cpp -o accuracy.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/accuracy.cpp -o accuracy.out
     - name: Run Accuracy
       run: |
         ./accuracy.out
@@ -61,7 +61,7 @@ jobs:
         mv eigen-${{ matrix.eigenversion }} include
     - name: Build Bench
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark.cpp -o bench.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark.cpp -o bench.out
     - name: Run Bench
       continue-on-error: true
       run: |
@@ -69,14 +69,14 @@ jobs:
         ./bench.out
     - name: Build BenchMv
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark_mv.cpp -o benchmv.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark_mv.cpp -o benchmv.out
     - name: Run BenchMv
       continue-on-error: true
       run: |
         ./benchmv.out
     - name: Build Accuracy
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/accuracy.cpp -o accuracy.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/accuracy.cpp -o accuracy.out
     - name: Run Accuracy
       run: |
         ./accuracy.out
@@ -99,20 +99,20 @@ jobs:
     - uses: ilammy/msvc-dev-cmd@v1
     - name: Build Bench
       run: |
-        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:bench.exe .\test\benchmark.cpp
+        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:bench.exe .\benchmark\benchmark.cpp
     - name: Run Bench
       run: |
         bash -c "cat /proc/cpuinfo"
         .\bench.exe
     - name: Build BenchMv
       run: |
-        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:benchmv.exe .\test\benchmark_mv.cpp
+        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:benchmv.exe .\benchmark\benchmark_mv.cpp
     - name: Run BenchMv
       run: |
         .\benchmv.exe
     - name: Build Accuracy
       run: |
-        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:accuracy.exe .\test\accuracy.cpp
+        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:accuracy.exe .\benchmark\accuracy.cpp
     - name: Run Accuracy
       run: |
         .\accuracy.exe
@@ -134,4 +134,4 @@ jobs:
     - name: Test Reference
       run: |
         pip install scipy
-        python test/comp_scipy.py
+        python benchmark/comp_scipy.py
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5be30d5..836ac88 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -33,10 +33,10 @@ jobs:
     - name: Test Reference
       run: |
         pip install scipy
-        python test/comp_scipy.py
+        python benchmark/comp_scipy.py
     - name: Build Bench
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark.cpp -o bench.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark.cpp -o bench.out
     - name: Run Bench
       run: |
         cat /proc/cpuinfo
@@ -44,7 +44,7 @@ jobs:
         ./bench.out
     - name: Build BenchMv
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark_mv.cpp -o benchmv.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark_mv.cpp -o benchmv.out
     - name: Run BenchMv
       run: |
         ./benchmv.out
@@ -72,10 +72,10 @@ jobs:
     - name: Test Reference
       run: |
         pip install scipy
-        python test/comp_scipy.py
+        python benchmark/comp_scipy.py
     - name: Build Bench
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark.cpp -o bench.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark.cpp -o bench.out
     - name: Run Bench
       continue-on-error: true
       run: |
@@ -84,7 +84,7 @@ jobs:
         ./bench.out
     - name: Build BenchMv
       run: |
-        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes test/benchmark_mv.cpp -o benchmv.out
+        g++ -std=c++11 -g -O3 -${{ matrix.arch }} -DNDEBUG -I./ -I./include -Wno-ignored-attributes benchmark/benchmark_mv.cpp -o benchmv.out
     - name: Run BenchMv
       continue-on-error: true
       run: |
@@ -113,18 +113,18 @@ jobs:
     - name: Test Reference
       run: |
         pip install scipy
-        python test/comp_scipy.py
+        python benchmark/comp_scipy.py
     - uses: ilammy/msvc-dev-cmd@v1
     - name: Build Bench
       run: |
-        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:bench.exe .\test\benchmark.cpp
+        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:bench.exe .\benchmark\benchmark.cpp
     - name: Run Bench
       run: |
         bash -c "cat /proc/cpuinfo"
         .\bench.exe
     - name: Build BenchMv
       run: |
-        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:benchmv.exe .\test\benchmark_mv.cpp
+        cl.exe /O2 ${{ matrix.arch }} /I.\ /I.\include /D "NDEBUG" /Fe:benchmv.exe .\benchmark\benchmark_mv.cpp
     - name: Run BenchMv
       run: |
         .\benchmv.exe
@@ -146,4 +146,4 @@ jobs:
     - name: Test Reference
       run: |
         pip install scipy
-        python test/comp_scipy.py
+        python benchmark/comp_scipy.py
diff --git a/Benchmark.vcxproj b/Benchmark.vcxproj
index 974047d..571ce67 100644
--- a/Benchmark.vcxproj
+++ b/Benchmark.vcxproj
@@ -1,10 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM64">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM64">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelAVX|Win32">
       <Configuration>RelAVX</Configuration>
       <Platform>Win32</Platform>
@@ -21,6 +45,14 @@
       <Configuration>RelAVX2</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM64">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelNoVect|Win32">
       <Configuration>RelNoVect</Configuration>
       <Platform>Win32</Platform>
@@ -29,6 +61,14 @@
       <Configuration>RelNoVect</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM64">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelSSE2|Win32">
       <Configuration>RelSSE2</Configuration>
       <Platform>Win32</Platform>
@@ -43,9 +83,9 @@
     </ProjectConfiguration>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\benchmark.cpp" />
-  </ItemGroup>
-  <ItemGroup>
+    <ClInclude Include="EigenRand\arch\AVX\MorePacketMath.h" />
+    <ClInclude Include="EigenRand\arch\NEON\MorePacketMath.h" />
+    <ClInclude Include="EigenRand\arch\SSE\MorePacketMath.h" />
     <ClInclude Include="EigenRand\Core.h" />
     <ClInclude Include="EigenRand\Dists\Basic.h" />
     <ClInclude Include="EigenRand\Dists\Discrete.h" />
@@ -68,6 +108,9 @@
   <ItemGroup>
     <None Include="README.md" />
   </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="benchmark\benchmark.cpp" />
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
     <ProjectGuid>{F45E39EE-2863-4550-8531-31723AD0BC09}</ProjectGuid>
@@ -117,6 +160,18 @@
     <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -124,6 +179,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -131,6 +200,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -138,6 +221,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -145,6 +242,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -168,18 +279,48 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
@@ -189,6 +330,14 @@
     <LinkIncremental>true</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
@@ -209,18 +358,50 @@
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -252,6 +433,38 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -349,6 +562,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -368,6 +621,44 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -388,6 +679,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -408,6 +739,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/Benchmark.vcxproj.filters b/Benchmark.vcxproj.filters
index 1711a81..4bf2f21 100644
--- a/Benchmark.vcxproj.filters
+++ b/Benchmark.vcxproj.filters
@@ -19,6 +19,18 @@
     <Filter Include="EigenRand\MvDists">
       <UniqueIdentifier>{9aa8288d-cadf-40d0-9665-a7e61ad37e47}</UniqueIdentifier>
     </Filter>
+    <Filter Include="EigenRand\arch">
+      <UniqueIdentifier>{2dd5c265-07cd-4265-a1c4-fdd2908d029f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="EigenRand\arch\SSE">
+      <UniqueIdentifier>{316761d9-7742-4fd3-b64d-4ccb959247d2}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="EigenRand\arch\AVX">
+      <UniqueIdentifier>{a01afb4b-04af-4be7-aa91-8670aff6591c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="EigenRand\arch\NEON">
+      <UniqueIdentifier>{5ba81498-594f-45cf-90f7-230750bfecd4}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="EigenRand\MorePacketMath.h">
@@ -63,12 +75,21 @@
     <ClInclude Include="EigenRand\MvDists\MvNormal.h">
       <Filter>EigenRand\MvDists</Filter>
     </ClInclude>
+    <ClInclude Include="EigenRand\arch\SSE\MorePacketMath.h">
+      <Filter>EigenRand\arch\SSE</Filter>
+    </ClInclude>
+    <ClInclude Include="EigenRand\arch\AVX\MorePacketMath.h">
+      <Filter>EigenRand\arch\AVX</Filter>
+    </ClInclude>
+    <ClInclude Include="EigenRand\arch\NEON\MorePacketMath.h">
+      <Filter>EigenRand\arch\NEON</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <None Include="README.md" />
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\benchmark.cpp">
+    <ClCompile Include="benchmark\benchmark.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/BenchmarkMv.vcxproj b/BenchmarkMv.vcxproj
index 38ceb6e..15e9539 100644
--- a/BenchmarkMv.vcxproj
+++ b/BenchmarkMv.vcxproj
@@ -1,10 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM64">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM64">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelAVX|Win32">
       <Configuration>RelAVX</Configuration>
       <Platform>Win32</Platform>
@@ -21,6 +45,14 @@
       <Configuration>RelAVX2</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM64">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelNoVect|Win32">
       <Configuration>RelNoVect</Configuration>
       <Platform>Win32</Platform>
@@ -29,6 +61,14 @@
       <Configuration>RelNoVect</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM64">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelSSE2|Win32">
       <Configuration>RelSSE2</Configuration>
       <Platform>Win32</Platform>
@@ -46,7 +86,7 @@
     <None Include="README.md" />
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\benchmark_mv.cpp" />
+    <ClCompile Include="benchmark\benchmark_mv.cpp" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
@@ -97,6 +137,18 @@
     <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -104,6 +156,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -111,6 +177,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -118,6 +198,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -125,6 +219,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -148,18 +256,48 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
@@ -169,6 +307,14 @@
     <LinkIncremental>true</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
@@ -189,18 +335,50 @@
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -232,6 +410,38 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -329,6 +539,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -348,6 +598,44 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -368,6 +656,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -388,6 +716,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/BenchmarkMv.vcxproj.filters b/BenchmarkMv.vcxproj.filters
index 168a64f..b615f7d 100644
--- a/BenchmarkMv.vcxproj.filters
+++ b/BenchmarkMv.vcxproj.filters
@@ -14,7 +14,7 @@
     <None Include="README.md" />
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\benchmark_mv.cpp">
+    <ClCompile Include="benchmark\benchmark_mv.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/EigenRand.sln b/EigenRand.sln
index 862c2f0..d8f4ca9 100644
--- a/EigenRand.sln
+++ b/EigenRand.sln
@@ -5,84 +5,228 @@ VisualStudioVersion = 16.0.30517.126
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Benchmark", "Benchmark.vcxproj", "{F45E39EE-2863-4550-8531-31723AD0BC09}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestAccuracy", "TestAccuracy.vcxproj", "{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Accuracy", "TestAccuracy.vcxproj", "{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BenchmarkMv", "BenchmarkMv.vcxproj", "{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}"
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test\test.vcxproj", "{643D8602-FE0D-4EAF-841C-E690EE6E53FD}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|ARM = Debug|ARM
+		Debug|ARM64 = Debug|ARM64
 		Debug|x64 = Debug|x64
 		Debug|x86 = Debug|x86
+		RelAVX|ARM = RelAVX|ARM
+		RelAVX|ARM64 = RelAVX|ARM64
 		RelAVX|x64 = RelAVX|x64
 		RelAVX|x86 = RelAVX|x86
+		RelAVX2|ARM = RelAVX2|ARM
+		RelAVX2|ARM64 = RelAVX2|ARM64
 		RelAVX2|x64 = RelAVX2|x64
 		RelAVX2|x86 = RelAVX2|x86
+		Release|ARM = Release|ARM
+		Release|ARM64 = Release|ARM64
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		RelNoVect|ARM = RelNoVect|ARM
+		RelNoVect|ARM64 = RelNoVect|ARM64
 		RelNoVect|x64 = RelNoVect|x64
 		RelNoVect|x86 = RelNoVect|x86
+		RelSSE2|ARM = RelSSE2|ARM
+		RelSSE2|ARM64 = RelSSE2|ARM64
 		RelSSE2|x64 = RelSSE2|x64
 		RelSSE2|x86 = RelSSE2|x86
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|ARM.ActiveCfg = Debug|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|ARM.Build.0 = Debug|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|ARM64.Build.0 = Debug|ARM64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|x64.ActiveCfg = Debug|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|x64.Build.0 = Debug|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|x86.ActiveCfg = Debug|Win32
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.Debug|x86.Build.0 = Debug|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|ARM.ActiveCfg = RelAVX|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|ARM.Build.0 = RelAVX|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|ARM64.ActiveCfg = RelAVX|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|ARM64.Build.0 = RelAVX|ARM64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|x64.ActiveCfg = RelAVX|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|x64.Build.0 = RelAVX|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|x86.ActiveCfg = RelAVX|Win32
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX|x86.Build.0 = RelAVX|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|ARM.ActiveCfg = RelAVX2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|ARM.Build.0 = RelAVX2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|ARM64.ActiveCfg = RelAVX2|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|ARM64.Build.0 = RelAVX2|ARM64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|x64.ActiveCfg = RelAVX2|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|x64.Build.0 = RelAVX2|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|x86.ActiveCfg = RelAVX2|Win32
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelAVX2|x86.Build.0 = RelAVX2|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|ARM.ActiveCfg = RelSSE2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|ARM.Build.0 = RelSSE2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|ARM64.ActiveCfg = RelSSE2|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|ARM64.Build.0 = RelSSE2|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|x64.ActiveCfg = RelSSE2|x64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|x64.Build.0 = RelSSE2|x64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|x86.ActiveCfg = RelSSE2|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.Release|x86.Build.0 = RelSSE2|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|ARM.ActiveCfg = RelNoVect|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|ARM.Build.0 = RelNoVect|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|ARM64.ActiveCfg = RelNoVect|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|ARM64.Build.0 = RelNoVect|ARM64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|x64.ActiveCfg = RelNoVect|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|x64.Build.0 = RelNoVect|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|x86.ActiveCfg = RelNoVect|Win32
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelNoVect|x86.Build.0 = RelNoVect|Win32
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|ARM.ActiveCfg = RelSSE2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|ARM.Build.0 = RelSSE2|ARM
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|ARM64.ActiveCfg = RelSSE2|ARM64
+		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|ARM64.Build.0 = RelSSE2|ARM64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|x64.ActiveCfg = RelSSE2|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|x64.Build.0 = RelSSE2|x64
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|x86.ActiveCfg = RelSSE2|Win32
 		{F45E39EE-2863-4550-8531-31723AD0BC09}.RelSSE2|x86.Build.0 = RelSSE2|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|ARM.ActiveCfg = Debug|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|ARM.Build.0 = Debug|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|ARM64.Build.0 = Debug|ARM64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|x64.ActiveCfg = Debug|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|x64.Build.0 = Debug|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|x86.ActiveCfg = Debug|Win32
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Debug|x86.Build.0 = Debug|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|ARM.ActiveCfg = RelAVX|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|ARM.Build.0 = RelAVX|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|ARM64.ActiveCfg = RelAVX|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|ARM64.Build.0 = RelAVX|ARM64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|x64.ActiveCfg = RelAVX|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|x64.Build.0 = RelAVX|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|x86.ActiveCfg = RelAVX|Win32
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX|x86.Build.0 = RelAVX|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|ARM.ActiveCfg = RelAVX2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|ARM.Build.0 = RelAVX2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|ARM64.ActiveCfg = RelAVX2|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|ARM64.Build.0 = RelAVX2|ARM64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|x64.ActiveCfg = RelAVX2|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|x64.Build.0 = RelAVX2|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|x86.ActiveCfg = RelAVX2|Win32
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelAVX2|x86.Build.0 = RelAVX2|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|ARM.ActiveCfg = RelSSE2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|ARM.Build.0 = RelSSE2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|ARM64.ActiveCfg = RelSSE2|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|ARM64.Build.0 = RelSSE2|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|x64.ActiveCfg = RelSSE2|x64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|x64.Build.0 = RelSSE2|x64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|x86.ActiveCfg = RelSSE2|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.Release|x86.Build.0 = RelSSE2|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|ARM.ActiveCfg = RelNoVect|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|ARM.Build.0 = RelNoVect|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|ARM64.ActiveCfg = RelNoVect|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|ARM64.Build.0 = RelNoVect|ARM64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|x64.ActiveCfg = RelNoVect|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|x64.Build.0 = RelNoVect|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|x86.ActiveCfg = RelNoVect|Win32
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelNoVect|x86.Build.0 = RelNoVect|Win32
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|ARM.ActiveCfg = RelSSE2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|ARM.Build.0 = RelSSE2|ARM
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|ARM64.ActiveCfg = RelSSE2|ARM64
+		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|ARM64.Build.0 = RelSSE2|ARM64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|x64.ActiveCfg = RelSSE2|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|x64.Build.0 = RelSSE2|x64
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|x86.ActiveCfg = RelSSE2|Win32
 		{8A7F8C9A-2E06-4767-8BBA-E1DE1CB341AB}.RelSSE2|x86.Build.0 = RelSSE2|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|ARM.ActiveCfg = Debug|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|ARM.Build.0 = Debug|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|ARM64.Build.0 = Debug|ARM64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|x64.ActiveCfg = Debug|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|x64.Build.0 = Debug|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|x86.ActiveCfg = Debug|Win32
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Debug|x86.Build.0 = Debug|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|ARM.ActiveCfg = RelAVX|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|ARM.Build.0 = RelAVX|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|ARM64.ActiveCfg = RelAVX|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|ARM64.Build.0 = RelAVX|ARM64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|x64.ActiveCfg = RelAVX|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|x64.Build.0 = RelAVX|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|x86.ActiveCfg = RelAVX|Win32
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX|x86.Build.0 = RelAVX|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|ARM.ActiveCfg = RelAVX2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|ARM.Build.0 = RelAVX2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|ARM64.ActiveCfg = RelAVX2|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|ARM64.Build.0 = RelAVX2|ARM64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|x64.ActiveCfg = RelAVX2|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|x64.Build.0 = RelAVX2|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|x86.ActiveCfg = RelAVX2|Win32
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelAVX2|x86.Build.0 = RelAVX2|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|ARM.ActiveCfg = RelSSE2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|ARM.Build.0 = RelSSE2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|ARM64.ActiveCfg = RelSSE2|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|ARM64.Build.0 = RelSSE2|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|x64.ActiveCfg = RelSSE2|x64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|x64.Build.0 = RelSSE2|x64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|x86.ActiveCfg = RelSSE2|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.Release|x86.Build.0 = RelSSE2|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|ARM.ActiveCfg = RelNoVect|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|ARM.Build.0 = RelNoVect|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|ARM64.ActiveCfg = RelNoVect|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|ARM64.Build.0 = RelNoVect|ARM64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|x64.ActiveCfg = RelNoVect|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|x64.Build.0 = RelNoVect|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|x86.ActiveCfg = RelNoVect|Win32
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelNoVect|x86.Build.0 = RelNoVect|Win32
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|ARM.ActiveCfg = RelSSE2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|ARM.Build.0 = RelSSE2|ARM
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|ARM64.ActiveCfg = RelSSE2|ARM64
+		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|ARM64.Build.0 = RelSSE2|ARM64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|x64.ActiveCfg = RelSSE2|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|x64.Build.0 = RelSSE2|x64
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|x86.ActiveCfg = RelSSE2|Win32
 		{85BACD1D-1D4F-4084-8C6A-5C3AC938FE50}.RelSSE2|x86.Build.0 = RelSSE2|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|ARM.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|ARM64.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|x64.ActiveCfg = Debug|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|x64.Build.0 = Debug|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|x86.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Debug|x86.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|ARM.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|ARM.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|ARM64.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|ARM64.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|x64.ActiveCfg = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|x64.Build.0 = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|x86.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX|x86.Build.0 = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|ARM.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|ARM.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|ARM64.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|ARM64.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|x64.ActiveCfg = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|x64.Build.0 = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|x86.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelAVX2|x86.Build.0 = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|ARM.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|ARM64.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|x64.ActiveCfg = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|x64.Build.0 = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|x86.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.Release|x86.Build.0 = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|ARM.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|ARM.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|ARM64.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|ARM64.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|x64.ActiveCfg = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|x64.Build.0 = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|x86.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelNoVect|x86.Build.0 = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|ARM.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|ARM.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|ARM64.ActiveCfg = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|ARM64.Build.0 = Debug|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|x64.ActiveCfg = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|x64.Build.0 = Release|x64
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|x86.ActiveCfg = Release|Win32
+		{643D8602-FE0D-4EAF-841C-E690EE6E53FD}.RelSSE2|x86.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/EigenRand/Core.h b/EigenRand/Core.h
index dd23126..ad6c704 100644
--- a/EigenRand/Core.h
+++ b/EigenRand/Core.h
@@ -13,15 +13,15 @@
 #ifndef EIGENRAND_CORE_H
 #define EIGENRAND_CORE_H
 
-#include <EigenRand/RandUtils.h>
+#include "RandUtils.h"
 
-#include <EigenRand/Dists/Basic.h>
-#include <EigenRand/Dists/Discrete.h>
-#include <EigenRand/Dists/NormalExp.h>
-#include <EigenRand/Dists/GammaPoisson.h>
+#include "Dists/Basic.h"
+#include "Dists/Discrete.h"
+#include "Dists/NormalExp.h"
+#include "Dists/GammaPoisson.h"
 
-#include <EigenRand/MvDists/MvNormal.h>
-#include <EigenRand/MvDists/Multinomial.h>
+#include "MvDists/MvNormal.h"
+#include "MvDists/Multinomial.h"
 
 namespace Eigen
 {
diff --git a/EigenRand/Dists/Basic.h b/EigenRand/Dists/Basic.h
index ccd609c..8022522 100644
--- a/EigenRand/Dists/Basic.h
+++ b/EigenRand/Dists/Basic.h
@@ -238,6 +238,31 @@ namespace Eigen
 
 		using OptCacheStore = CacheStore<EIGEN_MAX_ALIGN_BYTES>;
 
+		template<typename _Scalar>
+		struct ExtractFirstUint;
+
+		template<>
+		struct ExtractFirstUint<float>
+		{
+			template<typename Packet>
+			auto operator()(Packet v) -> decltype(Eigen::internal::pfirst(v))
+			{
+				return Eigen::internal::pfirst(v);
+			}
+		};
+
+		template<>
+		struct ExtractFirstUint<double>
+		{
+			template<typename Packet>
+			auto operator()(Packet v) -> uint64_t
+			{
+				uint64_t arr[sizeof(Packet) / 8];
+				Eigen::internal::pstoreu((Packet*)arr, v);
+				return arr[0];
+			}
+		};
+
 		/**
 		 * @brief Generator of random bits for integral scalars
 		 * 
@@ -296,13 +321,52 @@ namespace Eigen
 			}
 		};
 
+		/**
+		 * @brief Generator of reals in a range `[a, b]`
+		 *
+		 * @tparam _Scalar any real type
+		 */
+		template<typename _Scalar>
+		class Balanced2Gen : public GenBase<Balanced2Gen<_Scalar>, _Scalar>
+		{
+			static_assert(std::is_floating_point<_Scalar>::value, "balanced needs floating point types.");
+			_Scalar slope = 2, bias = -1;
+		public:
+			using Scalar = _Scalar;
+
+			/**
+			 * @brief Construct a new balanced generator
+			 *
+			 * @param _a,_b left and right boundary
+			 */
+			Balanced2Gen(_Scalar _a = -1, _Scalar _b = 1)
+				: slope{ _b - _a }, bias{ _a }
+			{
+			}
+
+			template<typename Rng>
+			EIGEN_STRONG_INLINE const _Scalar operator() (Rng&& rng)
+			{
+				using namespace Eigen::internal;
+				return ((_Scalar)((int32_t)pfirst(std::forward<Rng>(rng)()) & 0x7FFFFFFF) / 0x7FFFFFFF) * slope + bias;
+			}
+
+			template<typename Packet, typename Rng>
+			EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Rng&& rng)
+			{
+				using namespace Eigen::internal;
+				using RUtils = RandUtils<Packet, Rng>;
+				return RUtils{}.balanced(std::forward<Rng>(rng), slope, bias);
+			}
+		};
+
 		/**
 		 * @brief Generator of reals in a range `[0, 1)`
 		 * 
 		 * @tparam _Scalar any real type
 		 */
 		template<typename _Scalar>
-		class UniformRealGen : public GenBase<UniformRealGen<_Scalar>, _Scalar>
+		class StdUniformRealGen : public GenBase<StdUniformRealGen<_Scalar>, _Scalar>
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "uniformReal needs floating point types.");
 
@@ -313,14 +377,14 @@ namespace Eigen
 			EIGEN_STRONG_INLINE const _Scalar operator() (Rng&& rng)
 			{
 				using namespace Eigen::internal;
-				return bit_scalar<_Scalar>{}.to_ur(pfirst(std::forward<Rng>(rng)()));
+				return BitScalar<_Scalar>{}.to_ur(ExtractFirstUint<_Scalar>{}(std::forward<Rng>(rng)()));
 			}
 
 			template<typename Rng>
 			EIGEN_STRONG_INLINE const _Scalar nzur_scalar(Rng&& rng)
 			{
 				using namespace Eigen::internal;
-				return bit_scalar<_Scalar>{}.to_nzur(pfirst(std::forward<Rng>(rng)()));
+				return BitScalar<_Scalar>{}.to_nzur(ExtractFirstUint<_Scalar>{}(std::forward<Rng>(rng)()));
 			}
 
 			template<typename Packet, typename Rng>
@@ -332,6 +396,44 @@ namespace Eigen
 			}
 		};
 
+		template<typename _Scalar>
+		class UniformRealGen : public GenBase<UniformRealGen<_Scalar>, _Scalar>
+		{
+			static_assert(std::is_floating_point<_Scalar>::value, "uniformReal needs floating point types.");
+			_Scalar bias, slope;
+
+		public:
+			using Scalar = _Scalar;
+
+			UniformRealGen(_Scalar _min = 0, _Scalar _max = 1)
+				: bias{ _min }, slope{ _max - _min }
+			{
+			}
+
+			UniformRealGen(const UniformRealGen&) = default;
+			UniformRealGen(UniformRealGen&&) = default;
+
+			UniformRealGen& operator=(const UniformRealGen&) = default;
+			UniformRealGen& operator=(UniformRealGen&&) = default;
+
+			template<typename Rng>
+			EIGEN_STRONG_INLINE const _Scalar operator() (Rng&& rng)
+			{
+				using namespace Eigen::internal;
+				return bias + BitScalar<_Scalar>{}.to_ur(pfirst(std::forward<Rng>(rng)())) * slope;
+			}
+
+			template<typename Packet, typename Rng>
+			EIGEN_STRONG_INLINE const Packet packetOp(Rng&& rng)
+			{
+				using namespace Eigen::internal;
+				using RUtils = RandUtils<Packet, Rng>;
+				return padd(pmul(
+					RUtils{}.uniform_real(std::forward<Rng>(rng)), pset1<Packet>(slope)
+				), pset1<Packet>(bias));
+			}
+		};
+
 
 		/**
 		 * @brief Generator of Bernoulli distribution
@@ -468,7 +570,53 @@ namespace Eigen
 		}
 
 		template<typename Derived, typename Urng>
-		using UniformRealType = CwiseNullaryOp<internal::scalar_rng_adaptor<UniformRealGen<typename Derived::Scalar>, typename Derived::Scalar, Urng, true>, const Derived>;
+		using Balanced2Type = CwiseNullaryOp<internal::scalar_rng_adaptor<Balanced2Gen<typename Derived::Scalar>, typename Derived::Scalar, Urng, true>, const Derived>;
+
+		/**
+		 * @brief generates reals in a range `[a, b]`
+		 *
+		 * @tparam Derived a type of Eigen::DenseBase
+		 * @tparam Urng
+		 * @param rows the number of rows being generated
+		 * @param cols the number of columns being generated
+		 * @param urng c++11-style random number generator
+		 * @param a,b left and right boundary
+		 * @return a random matrix expression with a shape (`rows`, `cols`)
+		 *
+		 * @see Eigen::Rand::BalancedGen
+		 */
+		template<typename Derived, typename Urng>
+		inline const Balanced2Type<Derived, Urng>
+			balanced(Index rows, Index cols, Urng&& urng, typename Derived::Scalar a, typename Derived::Scalar b)
+		{
+			return {
+				rows, cols, { std::forward<Urng>(urng), Balanced2Gen<typename Derived::Scalar>{a, b} }
+			};
+		}
+
+		/**
+		 * @brief generates reals in a range `[a, b]`
+		 *
+		 * @tparam Derived
+		 * @tparam Urng
+		 * @param o an instance of any type of Eigen::DenseBase
+		 * @param urng c++11-style random number generator
+		 * @param a,b left and right boundary
+		 * @return a random matrix expression of the same shape as `o`
+		 *
+		 * @see Eigen::Rand::BalancedGen
+		 */
+		template<typename Derived, typename Urng>
+		inline const Balanced2Type<Derived, Urng>
+			balancedLike(const Derived& o, Urng&& urng, typename Derived::Scalar a, typename Derived::Scalar b)
+		{
+			return {
+				o.rows(), o.cols(), { std::forward<Urng>(urng), Balanced2Gen<typename Derived::Scalar>{a, b} }
+			};
+		}
+
+		template<typename Derived, typename Urng>
+		using StdUniformRealType = CwiseNullaryOp<internal::scalar_rng_adaptor<StdUniformRealGen<typename Derived::Scalar>, typename Derived::Scalar, Urng, true>, const Derived>;
 
 		/**
 		 * @brief generates reals in a range `[0, 1)`
@@ -483,7 +631,7 @@ namespace Eigen
 		 * @see Eigen::Rand::UniformRealGen
 		 */
 		template<typename Derived, typename Urng>
-		inline const UniformRealType<Derived, Urng>
+		inline const StdUniformRealType<Derived, Urng>
 			uniformReal(Index rows, Index cols, Urng&& urng)
 		{
 			return {
@@ -503,7 +651,7 @@ namespace Eigen
 		 * @see Eigen::Rand::UniformRealGen
 		 */
 		template<typename Derived, typename Urng>
-		inline const UniformRealType<Derived, Urng>
+		inline const StdUniformRealType<Derived, Urng>
 			uniformRealLike(Derived& o, Urng&& urng)
 		{
 			return {
@@ -511,6 +659,52 @@ namespace Eigen
 			};
 		}
 
+		template<typename Derived, typename Urng>
+		using UniformRealType = CwiseNullaryOp<internal::scalar_rng_adaptor<UniformRealGen<typename Derived::Scalar>, typename Derived::Scalar, Urng, true>, const Derived>;
+
+		/**
+		 * @brief generates reals in a range `[min, max)`
+		 *
+		 * @tparam Derived a type of Eigen::DenseBase
+		 * @tparam Urng
+		 * @param rows the number of rows being generated
+		 * @param cols the number of columns being generated
+		 * @param urng c++11-style random number generator
+		 * @param min, max the range of reals being generated
+		 * @return a random matrix expression with a shape (`rows`, `cols`)
+		 *
+		 * @see Eigen::Rand::UniformRealGen
+		 */
+		template<typename Derived, typename Urng>
+		inline const UniformRealType<Derived, Urng>
+			uniformReal(Index rows, Index cols, Urng&& urng, typename Derived::Scalar min, typename Derived::Scalar max)
+		{
+			return {
+				rows, cols, { std::forward<Urng>(urng), UniformRealGen<typename Derived::Scalar>{ min, max } }
+			};
+		}
+
+		/**
+		 * @brief generates reals in a range `[min, max)`
+		 *
+		 * @tparam Derived
+		 * @tparam Urng
+		 * @param o an instance of any type of Eigen::DenseBase
+		 * @param urng c++11-style random number generator
+		 * @param min, max the range of reals being generated
+		 * @return a random matrix expression of the same shape as `o`
+		 *
+		 * @see Eigen::Rand::UniformRealGen
+		 */
+		template<typename Derived, typename Urng>
+		inline const UniformRealType<Derived, Urng>
+			uniformRealLike(Derived& o, Urng&& urng, typename Derived::Scalar min, typename Derived::Scalar max)
+		{
+			return {
+				o.rows(), o.cols(), { std::forward<Urng>(urng), UniformRealGen<typename Derived::Scalar>{ min, max } }
+			};
+		}
+
 		template<typename Derived, typename Urng>
 		using BernoulliType = CwiseNullaryOp<internal::scalar_rng_adaptor<BernoulliGen<typename Derived::Scalar>, typename Derived::Scalar, Urng, true>, const Derived>;
 
diff --git a/EigenRand/Dists/NormalExp.h b/EigenRand/Dists/NormalExp.h
index 6e90a98..d6c9d07 100644
--- a/EigenRand/Dists/NormalExp.h
+++ b/EigenRand/Dists/NormalExp.h
@@ -27,7 +27,7 @@ namespace Eigen
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "normalDist needs floating point types.");
 			bool valid = false;
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			
 		public:
 			using Scalar = _Scalar;
@@ -186,7 +186,7 @@ namespace Eigen
 		class StudentTGen : public GenBase<StudentTGen<_Scalar>, _Scalar>
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "studentT needs floating point types.");
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar n;
 
 		public:
@@ -256,7 +256,7 @@ namespace Eigen
 		{
 			friend GammaGen<_Scalar>;
 			static_assert(std::is_floating_point<_Scalar>::value, "expDist needs floating point types.");
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar lambda = 1;
 
 		public:
@@ -481,7 +481,7 @@ namespace Eigen
 		class WeibullGen : public GenBase<WeibullGen<_Scalar>, _Scalar>
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "weilbullDist needs floating point types.");
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar a = 1, b = 1;
 
 		public:
@@ -530,7 +530,7 @@ namespace Eigen
 		class ExtremeValueGen : public GenBase<ExtremeValueGen<_Scalar>, _Scalar>
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "extremeValueDist needs floating point types.");
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar a = 0, b = 1;
 
 		public:
@@ -622,7 +622,7 @@ namespace Eigen
 		class CauchyGen : public GenBase<CauchyGen<_Scalar>, _Scalar>
 		{
 			static_assert(std::is_floating_point<_Scalar>::value, "cauchyDist needs floating point types.");
-			UniformRealGen<_Scalar> ur; 
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar a = 0, b = 1;
 
 		public:
@@ -679,7 +679,7 @@ namespace Eigen
 			friend FisherFGen<_Scalar>;
 			static_assert(std::is_floating_point<_Scalar>::value, "betaDist needs floating point types.");
 			int cache_rest_cnt = 0;
-			UniformRealGen<_Scalar> ur;
+			StdUniformRealGen<_Scalar> ur;
 			_Scalar a, b;
 			GammaGen<_Scalar> gd1, gd2;
 
diff --git a/EigenRand/EigenRand b/EigenRand/EigenRand
index 28d218c..b278786 100644
--- a/EigenRand/EigenRand
+++ b/EigenRand/EigenRand
@@ -13,7 +13,7 @@
 #define EIGENRAND_EIGENRAND_H
 
 #include <Eigen/Dense>
-#include <EigenRand/Macro.h>
-#include <EigenRand/Core.h>
+#include "Macro.h"
+#include "Core.h"
 
 #endif
\ No newline at end of file
diff --git a/EigenRand/Macro.h b/EigenRand/Macro.h
index 2836f6a..18cf375 100644
--- a/EigenRand/Macro.h
+++ b/EigenRand/Macro.h
@@ -2,8 +2,8 @@
  * @file Macro.h
  * @author bab2min (bab2min@gmail.com)
  * @brief 
- * @version 0.3.4
- * @date 2021-04-25
+ * @version 0.3.5
+ * @date 2021-07-16
  * 
  * @copyright Copyright (c) 2020-2021
  * 
@@ -14,7 +14,7 @@
 
 #define EIGENRAND_WORLD_VERSION 0
 #define EIGENRAND_MAJOR_VERSION 3
-#define EIGENRAND_MINOR_VERSION 4
+#define EIGENRAND_MINOR_VERSION 5
 
 #if EIGEN_VERSION_AT_LEAST(3,3,4)
 #else
diff --git a/EigenRand/MorePacketMath.h b/EigenRand/MorePacketMath.h
index 3558dc3..61a0404 100644
--- a/EigenRand/MorePacketMath.h
+++ b/EigenRand/MorePacketMath.h
@@ -30,43 +30,8 @@ namespace Eigen
 		template<typename Ty>
 		struct HalfPacket;
 
-#ifdef EIGEN_VECTORIZE_AVX2
-		template<>
-		struct IsIntPacket<Packet8i> : std::true_type {};
-
-		template<>
-		struct HalfPacket<Packet8i>
-		{
-			using type = Packet4i;
-		};
-#endif
-#ifdef EIGEN_VECTORIZE_AVX
-		template<>
-		struct IsFloatPacket<Packet8f> : std::true_type {};
-
-		template<>
-		struct IsDoublePacket<Packet4d> : std::true_type {};
-#endif
-#ifdef EIGEN_VECTORIZE_SSE2
-		template<>
-		struct IsIntPacket<Packet4i> : std::true_type {};
-
-		template<>
-		struct IsFloatPacket<Packet4f> : std::true_type {};
-
-		template<>
-		struct IsDoublePacket<Packet2d> : std::true_type {};
-
-		template<>
-		struct HalfPacket<Packet4i>
-		{
-			using type = uint64_t;
-		};
-#endif
 		template<typename Packet>
-		struct reinterpreter
-		{
-		};
+		struct reinterpreter{};
 
 		template<typename Packet>
 		inline auto reinterpret_to_float(const Packet& x)
@@ -89,6 +54,9 @@ namespace Eigen
 			return reinterpreter<Packet>{}.to_int(x);
 		}
 
+		template<typename Packet>
+		EIGEN_STRONG_INLINE void split_two(const Packet& p, typename HalfPacket<Packet>::type& a, typename HalfPacket<Packet>::type& b);
+
 		template<typename Packet>
 		EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
 
@@ -222,10 +190,10 @@ namespace Eigen
 		}
 
 		template<typename _Scalar>
-		struct bit_scalar;
+		struct BitScalar;
 
 		template<>
-		struct bit_scalar<float>
+		struct BitScalar<float>
 		{
 			float to_ur(uint32_t x)
 			{
@@ -245,7 +213,7 @@ namespace Eigen
 		};
 
 		template<>
-		struct bit_scalar<double>
+		struct BitScalar<double>
 		{
 			double to_ur(uint64_t x)
 			{
@@ -272,7 +240,7 @@ namespace Eigen
 
 		EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
 		{
-			bit_scalar<float> bs;
+			BitScalar<float> bs;
 			float2 ret;
 			ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
 			ret.f[1] = bs.to_ur(x >> 32);
@@ -566,1017 +534,16 @@ namespace Eigen
 }
 
 #ifdef EIGEN_VECTORIZE_AVX
-#include <immintrin.h>
-
-namespace Eigen
-{
-	namespace internal
-	{
-		template<>
-		struct reinterpreter<Packet8i>
-		{
-			EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
-			{
-				return _mm256_castsi256_ps(x);
-			}
-			
-			EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
-			{
-				return _mm256_castsi256_pd(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
-			{
-				return x;
-			}
-		};
-
-		template<>
-		struct reinterpreter<Packet8f>
-		{
-			EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
-			{
-				return x;
-			}
-
-			EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
-			{
-				return _mm256_castps_pd(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
-			{
-				return _mm256_castps_si256(x);
-			}
-		};
-
-		template<>
-		struct reinterpreter<Packet4d>
-		{
-			EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
-			{
-				return _mm256_castpd_ps(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
-			{
-				return x;
-			}
-
-			EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
-			{
-				return _mm256_castpd_si256(x);
-			}
-		};
-
-		EIGEN_STRONG_INLINE void split_two(const Packet8i& x, Packet4i& a, Packet4i& b)
-		{
-			a = _mm256_extractf128_si256(x, 0);
-			b = _mm256_extractf128_si256(x, 1);
-		}
-
-		EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
-		{
-			return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
-		}
-
-		EIGEN_STRONG_INLINE void split_two(const Packet8f& x, Packet4f& a, Packet4f& b)
-		{
-			a = _mm256_extractf128_ps(x, 0);
-			b = _mm256_extractf128_ps(x, 1);
-		}
-
-		EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
-		{
-			return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
-		}
-
-
-		EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
-#else
-			auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
-			return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
-		{
-			return _mm256_set1_epi64x(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i padd64<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_add_epi64(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_add_epi64(a1, b1), (Packet4i)_mm_add_epi64(a2, b2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i psub64<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_sub_epi64(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_sub_epi64(a1, b1), (Packet4i)_mm_sub_epi64(a2, b2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_cmpeq_epi32(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_cmpeq_epi32(a1, b1), (Packet4i)_mm_cmpeq_epi32(a2, b2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i psll<Packet8i>(const Packet8i& a, int b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_slli_epi32(a, b);
-#else
-			Packet4i a1, a2;
-			split_two(a, a1, a2);
-			return combine_two((Packet4i)_mm_slli_epi32(a1, b), (Packet4i)_mm_slli_epi32(a2, b));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i psrl<Packet8i>(const Packet8i& a, int b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_srli_epi32(a, b);
-#else
-			Packet4i a1, a2;
-			split_two(a, a1, a2);
-			return combine_two((Packet4i)_mm_srli_epi32(a1, b), (Packet4i)_mm_srli_epi32(a2, b));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i psll64<Packet8i>(const Packet8i& a, int b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_slli_epi64(a, b);
-#else
-			Packet4i a1, a2;
-			split_two(a, a1, a2);
-			return combine_two((Packet4i)_mm_slli_epi64(a1, b), (Packet4i)_mm_slli_epi64(a2, b));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i psrl64<Packet8i>(const Packet8i& a, int b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_srli_epi64(a, b);
-#else
-			Packet4i a1, a2;
-			split_two(a, a1, a2);
-			return combine_two((Packet4i)_mm_srli_epi64(a1, b), (Packet4i)_mm_srli_epi64(a2, b));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_add_epi32(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_add_epi32(a1, b1), (Packet4i)_mm_add_epi32(a2, b2));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_sub_epi32(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_sub_epi32(a1, b1), (Packet4i)_mm_sub_epi32(a2, b2));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_and_si256(a, b);
-#else
-			return reinterpret_to_int((Packet8f)_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_andnot_si256(a, b);
-#else
-			return reinterpret_to_int((Packet8f)_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_or_si256(a, b);
-#else
-			return reinterpret_to_int((Packet8f)_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
-#endif
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_xor_si256(a, b);
-#else
-			return reinterpret_to_int((Packet8f)_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_cmpgt_epi32(b, a);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_cmpgt_epi32(b1, a1), (Packet4i)_mm_cmpgt_epi32(b2, a2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pcmplt64<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_cmpgt_epi64(b, a);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_cmpgt_epi64(b1, a1), (Packet4i)_mm_cmpgt_epi64(b2, a2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
-		{
-			return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
-		{
-			return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
-		{
-			return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
-		{
-			return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
-		}
-
-		template<> 
-		EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) 
-		{
-			return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8i& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
-		{
-			return pblendv(_mm256_castsi256_ps(ifPacket), thenPacket, elsePacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
-		{
-			return _mm256_castps_si256(_mm256_blendv_ps(
-				_mm256_castsi256_ps(elsePacket), 
-				_mm256_castsi256_ps(thenPacket), 
-				_mm256_castsi256_ps(ifPacket)
-			));
-		}
-
-		template<> 
-		EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
-		{
-			return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d pblendv(const Packet8i& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
-		{
-			return pblendv(_mm256_castsi256_pd(ifPacket), thenPacket, elsePacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_i32gather_epi32(addr, index, 4);
-#else
-			uint32_t u[8];
-			_mm256_storeu_si256((Packet8i*)u, index);
-			return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
-				addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float *addr, const Packet8i& index)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_i32gather_ps(addr, index, 4);
-#else
-			uint32_t u[8];
-			_mm256_storeu_si256((Packet8i*)u, index);
-			return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
-				addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double *addr, const Packet8i& index, bool upperhalf)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
-#else
-			uint32_t u[8];
-			_mm256_storeu_si256((Packet8i*)u, index);
-			if (upperhalf)
-			{
-				return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
-			}
-			else
-			{
-				return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
-			}
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
-		{
-			return _mm256_movemask_ps(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
-		{
-			return _mm256_movemask_pd(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
-		{
-			return pmovemask(_mm256_castsi256_ps(a));
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
-		{
-			return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
-		{
-			return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm256_cmpeq_epi64(a, b);
-#else
-			Packet4i a1, a2, b1, b2;
-			split_two(a, a1, a2);
-			split_two(b, b1, b2);
-			return combine_two((Packet4i)_mm_cmpeq_epi64(a1, b1), (Packet4i)_mm_cmpeq_epi64(a2, b2));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
-		{
-			uint64_t u[4];
-			_mm256_storeu_si256((__m256i*)u, a);
-			u[0] = u[0] * b + c;
-			u[1] = u[1] * b + c;
-			u[2] = u[2] * b + c;
-			u[3] = u[3] * b + c;
-			return _mm256_loadu_si256((__m256i*)u);
-		}
-
-		EIGEN_STRONG_INLINE __m256d uint64_to_double(__m256i x) {
-			auto y = _mm256_or_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0010000000000000));
-			return _mm256_sub_pd(y, _mm256_set1_pd(0x0010000000000000));
-		}
-
-		EIGEN_STRONG_INLINE __m256d int64_to_double(__m256i x) {
-			x = padd64(x, _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
-			return _mm256_sub_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0018000000000000));
-		}
-
-		EIGEN_STRONG_INLINE __m256i double_to_int64(__m256d x) {
-			x = _mm256_add_pd(x, _mm256_set1_pd(0x0018000000000000));
-			return psub64(
-				_mm256_castpd_si256(x),
-				_mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))
-			);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet8i pcast64<Packet4d, Packet8i>(const Packet4d& a)
-		{
-			return double_to_int64(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4d pcast64<Packet8i, Packet4d>(const Packet8i& a)
-		{
-			return int64_to_double(a);
-		}
-
-		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-			Packet4d psin<Packet4d>(const Packet4d& x)
-		{
-			return _psin(x);
-		}
-
-		template <>
-		EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
-			plog<Packet4d>(const Packet4d& _x) {
-			Packet4d x = _x;
-			_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
-			_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
-
-			auto inv_mant_mask = _mm256_castsi256_pd(pseti64<Packet8i>(~0x7ff0000000000000));
-			auto min_norm_pos = _mm256_castsi256_pd(pseti64<Packet8i>(0x10000000000000));
-			auto minus_inf = _mm256_castsi256_pd(pseti64<Packet8i>(0xfff0000000000000));
-
-			// Polynomial coefficients.
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_SQRTHF, 0.707106781186547524);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p0, 7.0376836292E-2);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p1, -1.1514610310E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p2, 1.1676998740E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p3, -1.2420140846E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p4, +1.4249322787E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p5, -1.6668057665E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p6, +2.0000714765E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p7, -2.4999993993E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p8, +3.3333331174E-1);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q1, -2.12194440e-4);
-			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q2, 0.693359375);
-
-			Packet4d invalid_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
-			Packet4d iszero_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_EQ_OQ);
-
-			// Truncate input values to the minimum positive normal.
-			x = pmax(x, min_norm_pos);
-
-			Packet4d emm0 = uint64_to_double(psrl64(_mm256_castpd_si256(x), 52));
-			Packet4d e = psub(emm0, pset1<Packet4d>(1022));
-
-			// Set the exponents to -1, i.e. x are in the range [0.5,1).
-			x = _mm256_and_pd(x, inv_mant_mask);
-			x = _mm256_or_pd(x, p4d_half);
-
-			// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
-			// and shift by -1. The values are then centered around 0, which improves
-			// the stability of the polynomial evaluation.
-			//   if( x < SQRTHF ) {
-			//     e -= 1;
-			//     x = x + x - 1.0;
-			//   } else { x = x - 1.0; }
-			Packet4d mask = _mm256_cmp_pd(x, p4d_cephes_SQRTHF, _CMP_LT_OQ);
-			Packet4d tmp = _mm256_and_pd(x, mask);
-			x = psub(x, p4d_1);
-			e = psub(e, _mm256_and_pd(p4d_1, mask));
-			x = padd(x, tmp);
-
-			Packet4d x2 = pmul(x, x);
-			Packet4d x3 = pmul(x2, x);
-
-			// Evaluate the polynomial approximant of degree 8 in three parts, probably
-			// to improve instruction-level parallelism.
-			Packet4d y, y1, y2;
-			y = pmadd(p4d_cephes_log_p0, x, p4d_cephes_log_p1);
-			y1 = pmadd(p4d_cephes_log_p3, x, p4d_cephes_log_p4);
-			y2 = pmadd(p4d_cephes_log_p6, x, p4d_cephes_log_p7);
-			y = pmadd(y, x, p4d_cephes_log_p2);
-			y1 = pmadd(y1, x, p4d_cephes_log_p5);
-			y2 = pmadd(y2, x, p4d_cephes_log_p8);
-			y = pmadd(y, x3, y1);
-			y = pmadd(y, x3, y2);
-			y = pmul(y, x3);
-
-			// Add the logarithm of the exponent back to the result of the interpolation.
-			y1 = pmul(e, p4d_cephes_log_q1);
-			tmp = pmul(x2, p4d_half);
-			y = padd(y, y1);
-			x = psub(x, tmp);
-			y2 = pmul(e, p4d_cephes_log_q2);
-			x = padd(x, y);
-			x = padd(x, y2);
-
-			// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
-			return pblendv(iszero_mask, minus_inf, _mm256_or_pd(x, invalid_mask));
-		}
-
-#if EIGEN_VERSION_AT_LEAST(3,3,5)
-#else
-		template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
-			return _mm_cvtepi32_ps(a);
-		}
-
-		template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
-			return _mm_cvttps_epi32(a);
-		}
-#endif
-	}
-}
+#include "arch/AVX/MorePacketMath.h"
 #endif
 
 #ifdef EIGEN_VECTORIZE_SSE2
-#include <xmmintrin.h>
-
-namespace Eigen
-{
-	namespace internal
-	{
-		template<>
-		struct reinterpreter<Packet4i>
-		{
-			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
-			{
-				return _mm_castsi128_ps(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
-			{
-				return _mm_castsi128_pd(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
-			{
-				return x;
-			}
-		};
-
-		template<>
-		struct reinterpreter<Packet4f>
-		{
-			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
-			{
-				return x;
-			}
-
-			EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
-			{
-				return _mm_castps_pd(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
-			{
-				return _mm_castps_si128(x);
-			}
-		};
-
-		template<>
-		struct reinterpreter<Packet2d>
-		{
-			EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
-			{
-				return _mm_castpd_ps(x);
-			}
-
-			EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
-			{
-				return x;
-			}
-
-			EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
-			{
-				return _mm_castpd_si128(x);
-			}
-		};
-
-		EIGEN_STRONG_INLINE void split_two(const Packet4i& x, uint64_t& a, uint64_t& b)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			a = _mm_extract_epi64(x, 0);
-			b = _mm_extract_epi64(x, 1);
-#else
-			uint64_t u[2];
-			_mm_storeu_si128((__m128i*)u, x);
-			a = u[0];
-			b = u[1];
+#include "arch/SSE/MorePacketMath.h"
 #endif
-		}
-
-		EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
-		{
-			auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
-			auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
-			sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
-			sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
-			return _mm_or_si128(sa, sb);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
-		{
-			return _mm_set1_epi64x(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i padd64<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-			return _mm_add_epi64(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i psub64<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-			return _mm_sub_epi64(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-			return _mm_cmpeq_epi32(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i psll<Packet4i>(const Packet4i& a, int b)
-		{
-			return _mm_slli_epi32(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i psrl<Packet4i>(const Packet4i& a, int b)
-		{
-			return _mm_srli_epi32(a, b);
-		}
 
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i psll64<Packet4i>(const Packet4i& a, int b)
-		{
-			return _mm_slli_epi64(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i psrl64<Packet4i>(const Packet4i& a, int b)
-		{
-			return _mm_srli_epi64(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-			return _mm_cmplt_epi32(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pcmplt64<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_2
-			return _mm_cmpgt_epi64(b, a);
-#else
-			int64_t u[2], v[2];
-			_mm_storeu_si128((__m128i*)u, a);
-			_mm_storeu_si128((__m128i*)v, b);
-			return _mm_set_epi64x(u[1] < v[1] ? -1 : 0, u[0] < v[0] ? -1 : 0);
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
-		{
-			return _mm_cmplt_ps(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
-		{
-			return _mm_cmple_ps(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
-		{
-			return _mm_cmplt_pd(a, b);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
-		{
-			return _mm_cmple_pd(a, b);
-		}
-
-		template<> 
-		EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) 
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
-#else
-			return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
-		{
-			return pblendv(_mm_castsi128_ps(ifPacket), thenPacket, elsePacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
-#else
-			return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
+#ifdef EIGEN_VECTORIZE_NEON
+#include "arch/NEON/MorePacketMath.h"
 #endif
-		}
 
-		template<> 
-		EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) 
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
-#else
-			return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
 #endif
-		}
 
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d pblendv(const Packet4i& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
-		{
-			return pblendv(_mm_castsi128_pd(ifPacket), thenPacket, elsePacket);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm_i32gather_epi32(addr, index, 4);
-#else
-			uint32_t u[4];
-			_mm_storeu_si128((__m128i*)u, index);
-			return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm_i32gather_ps(addr, index, 4);
-#else
-			uint32_t u[4];
-			_mm_storeu_si128((__m128i*)u, index);
-			return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
-		{
-#ifdef EIGEN_VECTORIZE_AVX2
-			return _mm_i32gather_pd(addr, index, 8);
-#else
-			uint32_t u[4];
-			_mm_storeu_si128((__m128i*)u, index);
-			if (upperhalf)
-			{
-				return _mm_setr_pd(addr[u[2]], addr[u[3]]);
-			}
-			else
-			{
-				return _mm_setr_pd(addr[u[0]], addr[u[1]]);
-			}
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
-		{
-			return _mm_movemask_ps(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
-		{
-			return _mm_movemask_pd(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
-		{
-			return pmovemask((Packet4f)_mm_castsi128_ps(a));
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-#else
-			auto round = _MM_GET_ROUNDING_MODE();
-			_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
-			auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
-			_MM_SET_ROUNDING_MODE(round);
-			return ret;
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
-#else
-			auto round = _MM_GET_ROUNDING_MODE();
-			_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
-			auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
-			_MM_SET_ROUNDING_MODE(round);
-			return ret;
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
-		{
-#ifdef EIGEN_VECTORIZE_SSE4_1
-			return _mm_cmpeq_epi64(a, b);
-#else
-			Packet4i c = _mm_cmpeq_epi32(a, b);
-			return pand(c, (Packet4i)_mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
-#endif
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
-		{
-			uint64_t u[2];
-			_mm_storeu_si128((__m128i*)u, a);
-			u[0] = u[0] * b + c;
-			u[1] = u[1] * b + c;
-			return _mm_loadu_si128((__m128i*)u);
-		}
-
-		EIGEN_STRONG_INLINE __m128d uint64_to_double(__m128i x) {
-			x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)));
-			return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000));
-		}
-
-		EIGEN_STRONG_INLINE __m128d int64_to_double(__m128i x) {
-			x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000)));
-			return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000));
-		}
-
-		EIGEN_STRONG_INLINE __m128i double_to_int64(__m128d x) {
-			x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000));
-			return _mm_sub_epi64(
-				_mm_castpd_si128(x),
-				_mm_castpd_si128(_mm_set1_pd(0x0018000000000000))
-			);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet4i pcast64<Packet2d, Packet4i>(const Packet2d& a)
-		{
-			return double_to_int64(a);
-		}
-
-		template<>
-		EIGEN_STRONG_INLINE Packet2d pcast64<Packet4i, Packet2d>(const Packet4i& a)
-		{
-			return int64_to_double(a);
-		}
-
-		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-			Packet2d psin<Packet2d>(const Packet2d& x)
-		{
-			return _psin(x);
-		}
-
-		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
-			Packet2d plog<Packet2d>(const Packet2d& _x)
-		{
-			Packet2d x = _x;
-			_EIGEN_DECLARE_CONST_Packet2d(1, 1.0f);
-			_EIGEN_DECLARE_CONST_Packet2d(half, 0.5f);
-			_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
-
-			auto inv_mant_mask = _mm_castsi128_pd(pseti64<Packet4i>(~0x7ff0000000000000));
-			auto min_norm_pos = _mm_castsi128_pd(pseti64<Packet4i>(0x10000000000000));
-			auto minus_inf = _mm_castsi128_pd(pseti64<Packet4i>(0xfff0000000000000));
-
-			/* natural logarithm computed for 4 simultaneous float
-			  return NaN for x <= 0
-			*/
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_SQRTHF, 0.707106781186547524);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p0, 7.0376836292E-2);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p1, -1.1514610310E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p2, 1.1676998740E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p3, -1.2420140846E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p4, +1.4249322787E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p5, -1.6668057665E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p6, +2.0000714765E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p7, -2.4999993993E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p8, +3.3333331174E-1);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q1, -2.12194440e-4);
-			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q2, 0.693359375);
-
-
-			Packet4i emm0;
-
-			Packet2d invalid_mask = _mm_cmpnge_pd(x, _mm_setzero_pd()); // not greater equal is true if x is NaN
-			Packet2d iszero_mask = _mm_cmpeq_pd(x, _mm_setzero_pd());
-
-			x = pmax(x, min_norm_pos);  /* cut off denormalized stuff */
-			emm0 = _mm_srli_epi64(_mm_castpd_si128(x), 52);
-
-			/* keep only the fractional part */
-			x = _mm_and_pd(x, inv_mant_mask);
-			x = _mm_or_pd(x, p2d_half);
-
-			Packet2d e = _mm_sub_pd(uint64_to_double(emm0), pset1<Packet2d>(1022));
-
-			/* part2:
-			   if( x < SQRTHF ) {
-				 e -= 1;
-				 x = x + x - 1.0;
-			   } else { x = x - 1.0; }
-			*/
-			Packet2d mask = _mm_cmplt_pd(x, p2d_cephes_SQRTHF);
-			Packet2d tmp = pand(x, mask);
-			x = psub(x, p2d_1);
-			e = psub(e, pand(p2d_1, mask));
-			x = padd(x, tmp);
-
-			Packet2d x2 = pmul(x, x);
-			Packet2d x3 = pmul(x2, x);
-
-			Packet2d y, y1, y2;
-			y = pmadd(p2d_cephes_log_p0, x, p2d_cephes_log_p1);
-			y1 = pmadd(p2d_cephes_log_p3, x, p2d_cephes_log_p4);
-			y2 = pmadd(p2d_cephes_log_p6, x, p2d_cephes_log_p7);
-			y = pmadd(y, x, p2d_cephes_log_p2);
-			y1 = pmadd(y1, x, p2d_cephes_log_p5);
-			y2 = pmadd(y2, x, p2d_cephes_log_p8);
-			y = pmadd(y, x3, y1);
-			y = pmadd(y, x3, y2);
-			y = pmul(y, x3);
-
-			y1 = pmul(e, p2d_cephes_log_q1);
-			tmp = pmul(x2, p2d_half);
-			y = padd(y, y1);
-			x = psub(x, tmp);
-			y2 = pmul(e, p2d_cephes_log_q2);
-			x = padd(x, y);
-			x = padd(x, y2);
-			// negative arg will be NAN, 0 will be -INF
-			return pblendv(iszero_mask, minus_inf, _mm_or_pd(x, invalid_mask));
-		}
-	}
-}
-#endif
-
-#endif
diff --git a/EigenRand/PacketFilter.h b/EigenRand/PacketFilter.h
index 692bb31..bf6093d 100644
--- a/EigenRand/PacketFilter.h
+++ b/EigenRand/PacketFilter.h
@@ -13,7 +13,7 @@
 #define EIGENRAND_PACKET_FILTER_H
 
 #include <array>
-#include <EigenRand/MorePacketMath.h>
+#include "MorePacketMath.h"
 
 namespace Eigen
 {
diff --git a/EigenRand/PacketRandomEngine.h b/EigenRand/PacketRandomEngine.h
index 055f0a7..20bf9c6 100644
--- a/EigenRand/PacketRandomEngine.h
+++ b/EigenRand/PacketRandomEngine.h
@@ -15,7 +15,7 @@
 #include <array>
 #include <random>
 #include <type_traits>
-#include <EigenRand/MorePacketMath.h>
+#include "MorePacketMath.h"
 #include <fstream>
 
 namespace Eigen
@@ -537,7 +537,7 @@ namespace Eigen
 
 			float uniform_real()
 			{
-				internal::bit_scalar<float> bs;
+				internal::BitScalar<float> bs;
 				return bs.to_ur(this->operator()());
 			}
 		};
diff --git a/EigenRand/RandUtils.h b/EigenRand/RandUtils.h
index 155a5a2..afca40c 100644
--- a/EigenRand/RandUtils.h
+++ b/EigenRand/RandUtils.h
@@ -12,9 +12,9 @@
 #ifndef EIGENRAND_RAND_UTILS_H
 #define EIGENRAND_RAND_UTILS_H
 
-#include <EigenRand/MorePacketMath.h>
-#include <EigenRand/PacketFilter.h>
-#include <EigenRand/PacketRandomEngine.h>
+#include "MorePacketMath.h"
+#include "PacketFilter.h"
+#include "PacketRandomEngine.h"
 
 namespace Eigen
 {
@@ -38,6 +38,12 @@ namespace Eigen
 				return psub(pmul(this->zero_to_one(rng), pset1<PacketType>(2)), pset1<PacketType>(1));
 			}
 
+			template<typename Scalar>
+			EIGEN_STRONG_INLINE PacketType balanced(Rng& rng, Scalar slope, Scalar bias)
+			{
+				return padd(pmul(this->zero_to_one(rng), pset1<PacketType>(slope)), pset1<PacketType>(bias));
+			}
+
 			EIGEN_STRONG_INLINE PacketType nonzero_uniform_real(Rng& rng)
 			{
 				constexpr auto epsilon = std::numeric_limits<typename unpacket_traits<PacketType>::type>::epsilon() / 8;
diff --git a/EigenRand/arch/AVX/MorePacketMath.h b/EigenRand/arch/AVX/MorePacketMath.h
new file mode 100644
index 0000000..ce48db8
--- /dev/null
+++ b/EigenRand/arch/AVX/MorePacketMath.h
@@ -0,0 +1,601 @@
+/**
+ * @file MorePacketMath.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief
+ * @version 0.3.5
+ * @date 2021-07-16
+ *
+ * @copyright Copyright (c) 2020-2021
+ *
+ */
+
+#ifndef EIGENRAND_MORE_PACKET_MATH_AVX_H
+#define EIGENRAND_MORE_PACKET_MATH_AVX_H
+
+#include <immintrin.h>
+
+namespace Eigen
+{
+	namespace internal
+	{
+#ifdef EIGEN_VECTORIZE_AVX2
+		template<>
+		struct IsIntPacket<Packet8i> : std::true_type {};
+
+		template<>
+		struct HalfPacket<Packet8i>
+		{
+			using type = Packet4i;
+		};
+
+		template<>
+		struct HalfPacket<Packet8f>
+		{
+			using type = Packet4f;
+		};
+#endif
+		template<>
+		struct IsFloatPacket<Packet8f> : std::true_type {};
+
+		template<>
+		struct IsDoublePacket<Packet4d> : std::true_type {};
+
+		template<>
+		struct reinterpreter<Packet8i>
+		{
+			EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
+			{
+				return _mm256_castsi256_ps(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
+			{
+				return _mm256_castsi256_pd(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
+			{
+				return x;
+			}
+		};
+
+		template<>
+		struct reinterpreter<Packet8f>
+		{
+			EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
+			{
+				return x;
+			}
+
+			EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
+			{
+				return _mm256_castps_pd(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
+			{
+				return _mm256_castps_si256(x);
+			}
+		};
+
+		template<>
+		struct reinterpreter<Packet4d>
+		{
+			EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
+			{
+				return _mm256_castpd_ps(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
+			{
+				return x;
+			}
+
+			EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
+			{
+				return _mm256_castpd_si256(x);
+			}
+		};
+
+		template<>
+		EIGEN_STRONG_INLINE void split_two<Packet8i>(const Packet8i& x, Packet4i& a, Packet4i& b)
+		{
+			a = _mm256_extractf128_si256(x, 0);
+			b = _mm256_extractf128_si256(x, 1);
+		}
+
+		EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
+		{
+			return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE void split_two<Packet8f>(const Packet8f& x, Packet4f& a, Packet4f& b)
+		{
+			a = _mm256_extractf128_ps(x, 0);
+			b = _mm256_extractf128_ps(x, 1);
+		}
+
+		EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
+		{
+			return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
+		}
+
+
+		EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
+#else
+			auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
+			return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
+		{
+			return _mm256_set1_epi64x(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i padd64<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_add_epi64(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_add_epi64(a1, b1), (Packet4i)_mm_add_epi64(a2, b2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i psub64<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_sub_epi64(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_sub_epi64(a1, b1), (Packet4i)_mm_sub_epi64(a2, b2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_cmpeq_epi32(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_cmpeq_epi32(a1, b1), (Packet4i)_mm_cmpeq_epi32(a2, b2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i psll<Packet8i>(const Packet8i& a, int b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_slli_epi32(a, b);
+#else
+			Packet4i a1, a2;
+			split_two(a, a1, a2);
+			return combine_two((Packet4i)_mm_slli_epi32(a1, b), (Packet4i)_mm_slli_epi32(a2, b));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i psrl<Packet8i>(const Packet8i& a, int b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_srli_epi32(a, b);
+#else
+			Packet4i a1, a2;
+			split_two(a, a1, a2);
+			return combine_two((Packet4i)_mm_srli_epi32(a1, b), (Packet4i)_mm_srli_epi32(a2, b));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i psll64<Packet8i>(const Packet8i& a, int b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_slli_epi64(a, b);
+#else
+			Packet4i a1, a2;
+			split_two(a, a1, a2);
+			return combine_two((Packet4i)_mm_slli_epi64(a1, b), (Packet4i)_mm_slli_epi64(a2, b));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i psrl64<Packet8i>(const Packet8i& a, int b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_srli_epi64(a, b);
+#else
+			Packet4i a1, a2;
+			split_two(a, a1, a2);
+			return combine_two((Packet4i)_mm_srli_epi64(a1, b), (Packet4i)_mm_srli_epi64(a2, b));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_add_epi32(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_add_epi32(a1, b1), (Packet4i)_mm_add_epi32(a2, b2));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_sub_epi32(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_sub_epi32(a1, b1), (Packet4i)_mm_sub_epi32(a2, b2));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_and_si256(a, b);
+#else
+			return reinterpret_to_int((Packet8f)_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_andnot_si256(a, b);
+#else
+			return reinterpret_to_int((Packet8f)_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_or_si256(a, b);
+#else
+			return reinterpret_to_int((Packet8f)_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
+#endif
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_xor_si256(a, b);
+#else
+			return reinterpret_to_int((Packet8f)_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_cmpgt_epi32(b, a);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_cmpgt_epi32(b1, a1), (Packet4i)_mm_cmpgt_epi32(b2, a2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pcmplt64<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_cmpgt_epi64(b, a);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_cmpgt_epi64(b1, a1), (Packet4i)_mm_cmpgt_epi64(b2, a2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
+		{
+			return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
+		{
+			return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
+		{
+			return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
+		{
+			return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
+		{
+			return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8i& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
+		{
+			return pblendv(_mm256_castsi256_ps(ifPacket), thenPacket, elsePacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
+		{
+			return _mm256_castps_si256(_mm256_blendv_ps(
+				_mm256_castsi256_ps(elsePacket),
+				_mm256_castsi256_ps(thenPacket),
+				_mm256_castsi256_ps(ifPacket)
+			));
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
+		{
+			return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pblendv(const Packet8i& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
+		{
+			return pblendv(_mm256_castsi256_pd(ifPacket), thenPacket, elsePacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_i32gather_epi32(addr, index, 4);
+#else
+			uint32_t u[8];
+			_mm256_storeu_si256((Packet8i*)u, index);
+			return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
+				addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float* addr, const Packet8i& index)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_i32gather_ps(addr, index, 4);
+#else
+			uint32_t u[8];
+			_mm256_storeu_si256((Packet8i*)u, index);
+			return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
+				addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double* addr, const Packet8i& index, bool upperhalf)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
+#else
+			uint32_t u[8];
+			_mm256_storeu_si256((Packet8i*)u, index);
+			if (upperhalf)
+			{
+				return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
+			}
+			else
+			{
+				return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
+			}
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
+		{
+			return _mm256_movemask_ps(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
+		{
+			return _mm256_movemask_pd(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
+		{
+			return pmovemask(_mm256_castsi256_ps(a));
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
+		{
+			return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
+		{
+			return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm256_cmpeq_epi64(a, b);
+#else
+			Packet4i a1, a2, b1, b2;
+			split_two(a, a1, a2);
+			split_two(b, b1, b2);
+			return combine_two((Packet4i)_mm_cmpeq_epi64(a1, b1), (Packet4i)_mm_cmpeq_epi64(a2, b2));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
+		{
+			uint64_t u[4];
+			_mm256_storeu_si256((__m256i*)u, a);
+			u[0] = u[0] * b + c;
+			u[1] = u[1] * b + c;
+			u[2] = u[2] * b + c;
+			u[3] = u[3] * b + c;
+			return _mm256_loadu_si256((__m256i*)u);
+		}
+
+		EIGEN_STRONG_INLINE __m256d uint64_to_double(__m256i x) {
+			auto y = _mm256_or_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0010000000000000));
+			return _mm256_sub_pd(y, _mm256_set1_pd(0x0010000000000000));
+		}
+
+		EIGEN_STRONG_INLINE __m256d int64_to_double(__m256i x) {
+			x = padd64(x, _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
+			return _mm256_sub_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0018000000000000));
+		}
+
+		EIGEN_STRONG_INLINE __m256i double_to_int64(__m256d x) {
+			x = _mm256_add_pd(x, _mm256_set1_pd(0x0018000000000000));
+			return psub64(
+				_mm256_castpd_si256(x),
+				_mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))
+			);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet8i pcast64<Packet4d, Packet8i>(const Packet4d& a)
+		{
+			return double_to_int64(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4d pcast64<Packet8i, Packet4d>(const Packet8i& a)
+		{
+			return int64_to_double(a);
+		}
+
+		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+			Packet4d psin<Packet4d>(const Packet4d& x)
+		{
+			return _psin(x);
+		}
+
+		template <>
+		EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+			plog<Packet4d>(const Packet4d& _x) {
+			Packet4d x = _x;
+			_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+			_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+
+			auto inv_mant_mask = _mm256_castsi256_pd(pseti64<Packet8i>(~0x7ff0000000000000));
+			auto min_norm_pos = _mm256_castsi256_pd(pseti64<Packet8i>(0x10000000000000));
+			auto minus_inf = _mm256_castsi256_pd(pseti64<Packet8i>(0xfff0000000000000));
+
+			// Polynomial coefficients.
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_SQRTHF, 0.707106781186547524);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p0, 7.0376836292E-2);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p1, -1.1514610310E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p2, 1.1676998740E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p3, -1.2420140846E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p4, +1.4249322787E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p5, -1.6668057665E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p6, +2.0000714765E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p7, -2.4999993993E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_p8, +3.3333331174E-1);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q1, -2.12194440e-4);
+			_EIGEN_DECLARE_CONST_Packet4d(cephes_log_q2, 0.693359375);
+
+			Packet4d invalid_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
+			Packet4d iszero_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_EQ_OQ);
+
+			// Truncate input values to the minimum positive normal.
+			x = pmax(x, min_norm_pos);
+
+			Packet4d emm0 = uint64_to_double(psrl64(_mm256_castpd_si256(x), 52));
+			Packet4d e = psub(emm0, pset1<Packet4d>(1022));
+
+			// Set the exponents to -1, i.e. x are in the range [0.5,1).
+			x = _mm256_and_pd(x, inv_mant_mask);
+			x = _mm256_or_pd(x, p4d_half);
+
+			// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
+			// and shift by -1. The values are then centered around 0, which improves
+			// the stability of the polynomial evaluation.
+			//   if( x < SQRTHF ) {
+			//     e -= 1;
+			//     x = x + x - 1.0;
+			//   } else { x = x - 1.0; }
+			Packet4d mask = _mm256_cmp_pd(x, p4d_cephes_SQRTHF, _CMP_LT_OQ);
+			Packet4d tmp = _mm256_and_pd(x, mask);
+			x = psub(x, p4d_1);
+			e = psub(e, _mm256_and_pd(p4d_1, mask));
+			x = padd(x, tmp);
+
+			Packet4d x2 = pmul(x, x);
+			Packet4d x3 = pmul(x2, x);
+
+			// Evaluate the polynomial approximant of degree 8 in three parts, probably
+			// to improve instruction-level parallelism.
+			Packet4d y, y1, y2;
+			y = pmadd(p4d_cephes_log_p0, x, p4d_cephes_log_p1);
+			y1 = pmadd(p4d_cephes_log_p3, x, p4d_cephes_log_p4);
+			y2 = pmadd(p4d_cephes_log_p6, x, p4d_cephes_log_p7);
+			y = pmadd(y, x, p4d_cephes_log_p2);
+			y1 = pmadd(y1, x, p4d_cephes_log_p5);
+			y2 = pmadd(y2, x, p4d_cephes_log_p8);
+			y = pmadd(y, x3, y1);
+			y = pmadd(y, x3, y2);
+			y = pmul(y, x3);
+
+			// Add the logarithm of the exponent back to the result of the interpolation.
+			y1 = pmul(e, p4d_cephes_log_q1);
+			tmp = pmul(x2, p4d_half);
+			y = padd(y, y1);
+			x = psub(x, tmp);
+			y2 = pmul(e, p4d_cephes_log_q2);
+			x = padd(x, y);
+			x = padd(x, y2);
+
+			// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
+			return pblendv(iszero_mask, minus_inf, _mm256_or_pd(x, invalid_mask));
+		}
+
+#if EIGEN_VERSION_AT_LEAST(3,3,5)
+#else
+		template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+			return _mm_cvtepi32_ps(a);
+		}
+
+		template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+			return _mm_cvttps_epi32(a);
+		}
+#endif
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/EigenRand/arch/NEON/MorePacketMath.h b/EigenRand/arch/NEON/MorePacketMath.h
new file mode 100644
index 0000000..b73b828
--- /dev/null
+++ b/EigenRand/arch/NEON/MorePacketMath.h
@@ -0,0 +1,65 @@
+/**
+ * @file MorePacketMath.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief
+ * @version 0.4.0
+ * @date 2021-04-26
+ *
+ * @copyright Copyright (c) 2020-2021
+ *
+ */
+
+#ifndef EIGENRAND_MORE_PACKET_MATH_NEON_H
+#define EIGENRAND_MORE_PACKET_MATH_NEON_H
+
+#include <arm_neon.h>
+
+namespace Eigen
+{
+	namespace internal
+	{
+		template<>
+		struct IsIntPacket<Packet4i> : std::true_type {};
+
+		template<>
+		struct IsFloatPacket<Packet4f> : std::true_type {};
+
+		template<>
+		struct HalfPacket<Packet4i>
+		{
+			using type = uint64_t;
+		};
+
+
+		template<>
+		struct reinterpreter<Packet4i>
+		{
+			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
+			{
+				return vreinterpretq_f32_s32(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
+			{
+				return x;
+			}
+		};
+
+		template<>
+		struct reinterpreter<Packet4f>
+		{
+			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
+			{
+				return x;
+			}
+
+			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
+			{
+				return vreinterpretq_s32_f32(x);
+			}
+		};
+
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/EigenRand/arch/SSE/MorePacketMath.h b/EigenRand/arch/SSE/MorePacketMath.h
new file mode 100644
index 0000000..646f6ea
--- /dev/null
+++ b/EigenRand/arch/SSE/MorePacketMath.h
@@ -0,0 +1,487 @@
+/**
+ * @file MorePacketMath.h
+ * @author bab2min (bab2min@gmail.com)
+ * @brief
+ * @version 0.3.5
+ * @date 2021-07-16
+ *
+ * @copyright Copyright (c) 2020-2021
+ *
+ */
+
+#ifndef EIGENRAND_MORE_PACKET_MATH_SSE_H
+#define EIGENRAND_MORE_PACKET_MATH_SSE_H
+
+#include <xmmintrin.h>
+
+namespace Eigen
+{
+	namespace internal
+	{
+		template<>
+		struct IsIntPacket<Packet4i> : std::true_type {};
+
+		template<>
+		struct IsFloatPacket<Packet4f> : std::true_type {};
+
+		template<>
+		struct IsDoublePacket<Packet2d> : std::true_type {};
+
+		template<>
+		struct HalfPacket<Packet4i>
+		{
+			using type = uint64_t;
+		};
+
+#ifdef EIGEN_VECTORIZE_AVX
+#else
+		template<>
+		struct HalfPacket<Packet4f>
+		{
+			//using type = Packet2f;
+		};
+#endif
+		template<>
+		struct reinterpreter<Packet4i>
+		{
+			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
+			{
+				return _mm_castsi128_ps(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
+			{
+				return _mm_castsi128_pd(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
+			{
+				return x;
+			}
+		};
+
+		template<>
+		struct reinterpreter<Packet4f>
+		{
+			EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
+			{
+				return x;
+			}
+
+			EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
+			{
+				return _mm_castps_pd(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
+			{
+				return _mm_castps_si128(x);
+			}
+		};
+
+		template<>
+		struct reinterpreter<Packet2d>
+		{
+			EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
+			{
+				return _mm_castpd_ps(x);
+			}
+
+			EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
+			{
+				return x;
+			}
+
+			EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
+			{
+				return _mm_castpd_si128(x);
+			}
+		};
+
+		template<>
+		EIGEN_STRONG_INLINE void split_two<Packet4i>(const Packet4i& x, uint64_t& a, uint64_t& b)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			a = _mm_extract_epi64(x, 0);
+			b = _mm_extract_epi64(x, 1);
+#else
+			uint64_t u[2];
+			_mm_storeu_si128((__m128i*)u, x);
+			a = u[0];
+			b = u[1];
+#endif
+		}
+
+		EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
+		{
+			auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
+			auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
+			sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
+			sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
+			return _mm_or_si128(sa, sb);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
+		{
+			return _mm_set1_epi64x(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i padd64<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+			return _mm_add_epi64(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i psub64<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+			return _mm_sub_epi64(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+			return _mm_cmpeq_epi32(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i psll<Packet4i>(const Packet4i& a, int b)
+		{
+			return _mm_slli_epi32(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i psrl<Packet4i>(const Packet4i& a, int b)
+		{
+			return _mm_srli_epi32(a, b);
+		}
+
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i psll64<Packet4i>(const Packet4i& a, int b)
+		{
+			return _mm_slli_epi64(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i psrl64<Packet4i>(const Packet4i& a, int b)
+		{
+			return _mm_srli_epi64(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+			return _mm_cmplt_epi32(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pcmplt64<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_2
+			return _mm_cmpgt_epi64(b, a);
+#else
+			int64_t u[2], v[2];
+			_mm_storeu_si128((__m128i*)u, a);
+			_mm_storeu_si128((__m128i*)v, b);
+			return _mm_set_epi64x(u[1] < v[1] ? -1 : 0, u[0] < v[0] ? -1 : 0);
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
+		{
+			return _mm_cmplt_ps(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
+		{
+			return _mm_cmple_ps(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
+		{
+			return _mm_cmplt_pd(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
+		{
+			return _mm_cmple_pd(a, b);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
+#else
+			return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4i& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
+		{
+			return pblendv(_mm_castsi128_ps(ifPacket), thenPacket, elsePacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
+#else
+			return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
+#else
+			return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
+#endif
+		}
+
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pblendv(const Packet4i& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
+		{
+			return pblendv(_mm_castsi128_pd(ifPacket), thenPacket, elsePacket);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm_i32gather_epi32(addr, index, 4);
+#else
+			uint32_t u[4];
+			_mm_storeu_si128((__m128i*)u, index);
+			return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm_i32gather_ps(addr, index, 4);
+#else
+			uint32_t u[4];
+			_mm_storeu_si128((__m128i*)u, index);
+			return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
+		{
+#ifdef EIGEN_VECTORIZE_AVX2
+			return _mm_i32gather_pd(addr, index, 8);
+#else
+			uint32_t u[4];
+			_mm_storeu_si128((__m128i*)u, index);
+			if (upperhalf)
+			{
+				return _mm_setr_pd(addr[u[2]], addr[u[3]]);
+			}
+			else
+			{
+				return _mm_setr_pd(addr[u[0]], addr[u[1]]);
+			}
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
+		{
+			return _mm_movemask_ps(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
+		{
+			return _mm_movemask_pd(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
+		{
+			return pmovemask((Packet4f)_mm_castsi128_ps(a));
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+#else
+			auto round = _MM_GET_ROUNDING_MODE();
+			_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
+			auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
+			_MM_SET_ROUNDING_MODE(round);
+			return ret;
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+#else
+			auto round = _MM_GET_ROUNDING_MODE();
+			_MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
+			auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
+			_MM_SET_ROUNDING_MODE(round);
+			return ret;
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
+		{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+			return _mm_cmpeq_epi64(a, b);
+#else
+			Packet4i c = _mm_cmpeq_epi32(a, b);
+			return pand(c, (Packet4i)_mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
+#endif
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
+		{
+			uint64_t u[2];
+			_mm_storeu_si128((__m128i*)u, a);
+			u[0] = u[0] * b + c;
+			u[1] = u[1] * b + c;
+			return _mm_loadu_si128((__m128i*)u);
+		}
+
+		EIGEN_STRONG_INLINE __m128d uint64_to_double(__m128i x) {
+			x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)));
+			return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000));
+		}
+
+		EIGEN_STRONG_INLINE __m128d int64_to_double(__m128i x) {
+			x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000)));
+			return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000));
+		}
+
+		EIGEN_STRONG_INLINE __m128i double_to_int64(__m128d x) {
+			x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000));
+			return _mm_sub_epi64(
+				_mm_castpd_si128(x),
+				_mm_castpd_si128(_mm_set1_pd(0x0018000000000000))
+			);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet4i pcast64<Packet2d, Packet4i>(const Packet2d& a)
+		{
+			return double_to_int64(a);
+		}
+
+		template<>
+		EIGEN_STRONG_INLINE Packet2d pcast64<Packet4i, Packet2d>(const Packet4i& a)
+		{
+			return int64_to_double(a);
+		}
+
+		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+			Packet2d psin<Packet2d>(const Packet2d& x)
+		{
+			return _psin(x);
+		}
+
+		template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+			Packet2d plog<Packet2d>(const Packet2d& _x)
+		{
+			Packet2d x = _x;
+			_EIGEN_DECLARE_CONST_Packet2d(1, 1.0f);
+			_EIGEN_DECLARE_CONST_Packet2d(half, 0.5f);
+			_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+			auto inv_mant_mask = _mm_castsi128_pd(pseti64<Packet4i>(~0x7ff0000000000000));
+			auto min_norm_pos = _mm_castsi128_pd(pseti64<Packet4i>(0x10000000000000));
+			auto minus_inf = _mm_castsi128_pd(pseti64<Packet4i>(0xfff0000000000000));
+
+			/* natural logarithm computed for 4 simultaneous float
+			  return NaN for x <= 0
+			*/
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_SQRTHF, 0.707106781186547524);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p0, 7.0376836292E-2);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p1, -1.1514610310E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p2, 1.1676998740E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p3, -1.2420140846E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p4, +1.4249322787E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p5, -1.6668057665E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p6, +2.0000714765E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p7, -2.4999993993E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_p8, +3.3333331174E-1);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q1, -2.12194440e-4);
+			_EIGEN_DECLARE_CONST_Packet2d(cephes_log_q2, 0.693359375);
+
+
+			Packet4i emm0;
+
+			Packet2d invalid_mask = _mm_cmpnge_pd(x, _mm_setzero_pd()); // not greater equal is true if x is NaN
+			Packet2d iszero_mask = _mm_cmpeq_pd(x, _mm_setzero_pd());
+
+			x = pmax(x, min_norm_pos);  /* cut off denormalized stuff */
+			emm0 = _mm_srli_epi64(_mm_castpd_si128(x), 52);
+
+			/* keep only the fractional part */
+			x = _mm_and_pd(x, inv_mant_mask);
+			x = _mm_or_pd(x, p2d_half);
+
+			Packet2d e = _mm_sub_pd(uint64_to_double(emm0), pset1<Packet2d>(1022));
+
+			/* part2:
+			   if( x < SQRTHF ) {
+				 e -= 1;
+				 x = x + x - 1.0;
+			   } else { x = x - 1.0; }
+			*/
+			Packet2d mask = _mm_cmplt_pd(x, p2d_cephes_SQRTHF);
+			Packet2d tmp = pand(x, mask);
+			x = psub(x, p2d_1);
+			e = psub(e, pand(p2d_1, mask));
+			x = padd(x, tmp);
+
+			Packet2d x2 = pmul(x, x);
+			Packet2d x3 = pmul(x2, x);
+
+			Packet2d y, y1, y2;
+			y = pmadd(p2d_cephes_log_p0, x, p2d_cephes_log_p1);
+			y1 = pmadd(p2d_cephes_log_p3, x, p2d_cephes_log_p4);
+			y2 = pmadd(p2d_cephes_log_p6, x, p2d_cephes_log_p7);
+			y = pmadd(y, x, p2d_cephes_log_p2);
+			y1 = pmadd(y1, x, p2d_cephes_log_p5);
+			y2 = pmadd(y2, x, p2d_cephes_log_p8);
+			y = pmadd(y, x3, y1);
+			y = pmadd(y, x3, y2);
+			y = pmul(y, x3);
+
+			y1 = pmul(e, p2d_cephes_log_q1);
+			tmp = pmul(x2, p2d_half);
+			y = padd(y, y1);
+			x = psub(x, tmp);
+			y2 = pmul(e, p2d_cephes_log_q2);
+			x = padd(x, y);
+			x = padd(x, y2);
+			// negative arg will be NAN, 0 will be -INF
+			return pblendv(iszero_mask, minus_inf, _mm_or_pd(x, invalid_mask));
+		}
+	}
+}
+
+#endif
diff --git a/README.md b/README.md
index 6ce0fe0..cd5ac45 100644
--- a/README.md
+++ b/README.md
@@ -347,6 +347,11 @@ The results of EigenRand and C++ std appear to be equivalent within the margin o
 MIT License
 
 ## History
+### 0.3.5 (2021-07-16)
+* Now `UniformRealGen` generates accurate double values.
+* Fixed a bug where non-vectorized double-type `NormalGen` would get stuck in an infinite loop.
+* New overloading functions `balanced` and `balancedLike` which generate values over `[a, b]` were added.
+
 ### 0.3.4 (2021-04-25)
 * Now Eigen 3.3.4 - 3.3.6 versions are additionally supported.
 
diff --git a/TestAccuracy.vcxproj b/TestAccuracy.vcxproj
index a21dced..92eefe4 100644
--- a/TestAccuracy.vcxproj
+++ b/TestAccuracy.vcxproj
@@ -1,10 +1,34 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX2|ARM64">
+      <Configuration>RelAVX2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelAVX|ARM64">
+      <Configuration>RelAVX</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelAVX|Win32">
       <Configuration>RelAVX</Configuration>
       <Platform>Win32</Platform>
@@ -21,6 +45,14 @@
       <Configuration>RelAVX2</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelNoVect|ARM64">
+      <Configuration>RelNoVect</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelNoVect|Win32">
       <Configuration>RelNoVect</Configuration>
       <Platform>Win32</Platform>
@@ -29,6 +61,14 @@
       <Configuration>RelNoVect</Configuration>
       <Platform>x64</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="RelSSE2|ARM64">
+      <Configuration>RelSSE2</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="RelSSE2|Win32">
       <Configuration>RelSSE2</Configuration>
       <Platform>Win32</Platform>
@@ -43,7 +83,7 @@
     </ProjectConfiguration>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\accuracy.cpp" />
+    <ClCompile Include="benchmark\accuracy.cpp" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
@@ -51,7 +91,7 @@
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>EigenAddOn</RootNamespace>
     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>TestAccuracy</ProjectName>
+    <ProjectName>Accuracy</ProjectName>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -94,6 +134,18 @@
     <PlatformToolset>v142</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -101,6 +153,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -108,6 +174,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -115,6 +195,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -122,6 +216,20 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -145,18 +253,48 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='RelSSE2|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
@@ -166,6 +304,14 @@
     <LinkIncremental>true</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>E:\AddInclude;$(IncludePath)</IncludePath>
@@ -186,18 +332,50 @@
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(IncludePath)</IncludePath>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -229,6 +407,36 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;USE_ADDON;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|Win32'">
     <ClCompile>
       <PrecompiledHeader>
@@ -326,6 +534,46 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelSSE2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -345,6 +593,44 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelNoVect|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;EIGEN_DONT_VECTORIZE;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -365,6 +651,44 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|x64'">
     <ClCompile>
       <PrecompiledHeader>
@@ -385,6 +709,44 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='RelAVX2|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>__ARM_NEON;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/TestAccuracy.vcxproj.filters b/TestAccuracy.vcxproj.filters
index 1a9c43c..7ff5bf8 100644
--- a/TestAccuracy.vcxproj.filters
+++ b/TestAccuracy.vcxproj.filters
@@ -6,7 +6,7 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test\accuracy.cpp">
+    <ClCompile Include="benchmark\accuracy.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/test/accuracy.cpp b/benchmark/accuracy.cpp
similarity index 99%
rename from test/accuracy.cpp
rename to benchmark/accuracy.cpp
index fa450ae..fc06289 100644
--- a/test/accuracy.cpp
+++ b/benchmark/accuracy.cpp
@@ -182,7 +182,6 @@ std::map<std::string, double> test_eigenrand_cont(size_t size, size_t step, size
 {
 	std::map<std::string, double> ret;
 	Eigen::ArrayXf arr{ size };
-	Eigen::ArrayXd arrd{ size };
 	Rng urng{ seed };
 
 	arr = Eigen::Rand::balancedLike(arr, urng);
@@ -191,99 +190,101 @@ std::map<std::string, double> test_eigenrand_cont(size_t size, size_t step, size
 	arr = Eigen::Rand::uniformRealLike(arr, urng);
 	ret["uniformReal"] = calc_emd_with_cdf(arr, ur_cdf, step);
 
-	arrd = Eigen::Rand::uniformRealLike(arrd, urng);
-	ret["uniformReal/double"] = calc_emd_with_cdf(arrd, ur_cdf, step);
-
 	arr = Eigen::Rand::normalLike(arr, urng);
 	ret["normal"] = calc_emd_with_cdf(arr, normal_cdf, step);
 
-	arrd = Eigen::Rand::normalLike(arrd, urng);
-	ret["normal/double"] = calc_emd_with_cdf(arrd, normal_cdf, step);
-
 	arr = Eigen::Rand::lognormalLike(arr, urng);
 	ret["lognormal"] = calc_emd_with_cdf(arr, lognormal_cdf, step);
 
-	arrd = Eigen::Rand::lognormalLike(arrd, urng);
-	ret["lognormal/double"] = calc_emd_with_cdf(arrd, lognormal_cdf, step);
-
 	arr = Eigen::Rand::gammaLike(arr, urng, 1, 1);
 	ret["gamma(1,1)"] = calc_emd_with_pdf(arr, gamma11_pdf, step);
 
-	arrd = Eigen::Rand::gammaLike(arrd, urng, 1, 1);
-	ret["gamma(1,1)/double"] = calc_emd_with_pdf(arrd, gamma11_pdf, step);
-
 	arr = Eigen::Rand::gammaLike(arr, urng, 5, 1);
 	ret["gamma(5,1)"] = calc_emd_with_pdf(arr, gamma51_pdf, step);
 
-	arrd = Eigen::Rand::gammaLike(arrd, urng, 5, 1);
-	ret["gamma(5,1)/double"] = calc_emd_with_pdf(arrd, gamma51_pdf, step);
-
 	arr = Eigen::Rand::gammaLike(arr, urng, 0.2, 1);
 	ret["gamma(0.2,1)"] = calc_emd_with_pdf(arr, gamma21_pdf, step);
 
-	arrd = Eigen::Rand::gammaLike(arrd, urng, 0.2, 1);
-	ret["gamma(0.2,1)/double"] = calc_emd_with_pdf(arrd, gamma21_pdf, step);
-
 	arr = Eigen::Rand::exponentialLike(arr, urng);
 	ret["exponential"] = calc_emd_with_cdf(arr, exp_cdf, step);
 
-	arrd = Eigen::Rand::exponentialLike(arrd, urng);
-	ret["exponential/double"] = calc_emd_with_cdf(arrd, exp_cdf, step);
-
 	arr = Eigen::Rand::weibullLike(arr, urng, 2);
 	ret["weibull(2,1)"] = calc_emd_with_cdf(arr, weibull_cdf, step);
 
-	arrd = Eigen::Rand::weibullLike(arrd, urng, 2);
-	ret["weibull(2,1)/double"] = calc_emd_with_cdf(arrd, weibull_cdf, step);
-
 	arr = Eigen::Rand::extremeValueLike(arr, urng, 1, 1);
 	ret["extremeValue(1,1)"] = calc_emd_with_cdf(arr, extreme_value_cdf, step);
 
-	arrd = Eigen::Rand::extremeValueLike(arrd, urng, 1, 1);
-	ret["extremeValue(1,1)/double"] = calc_emd_with_cdf(arrd, extreme_value_cdf, step);
-
 	arr = Eigen::Rand::chiSquaredLike(arr, urng, 7);
 	ret["chiSquared(7)"] = calc_emd_with_pdf(arr, chisquared_pdf, step);
 
-	arrd = Eigen::Rand::chiSquaredLike(arrd, urng, 7);
-	ret["chiSquared(7)/double"] = calc_emd_with_pdf(arrd, chisquared_pdf, step);
-
 	arr = Eigen::Rand::cauchyLike(arr, urng);
 	ret["cauchy"] = calc_emd_with_cdf(arr, cauchy_cdf, step);
 
-	arrd = Eigen::Rand::cauchyLike(arrd, urng);
-	ret["cauchy/double"] = calc_emd_with_cdf(arrd, cauchy_cdf, step);
-
 	arr = Eigen::Rand::studentTLike(arr, urng, 1);
 	ret["studentT(1)"] = calc_emd_with_cdf(arr, cauchy_cdf, step);
 
-	arrd = Eigen::Rand::studentTLike(arrd, urng, 1);
-	ret["studentT(1)/double"] = calc_emd_with_cdf(arrd, cauchy_cdf, step);
-
 	arr = Eigen::Rand::studentTLike(arr, urng, 5);
 	ret["studentT(5)"] = calc_emd_with_pdf(arr, student5_pdf, step);
 
-	arrd = Eigen::Rand::studentTLike(arrd, urng, 5);
-	ret["studentT(5)/double"] = calc_emd_with_pdf(arrd, student5_pdf, step);
-
 	arr = Eigen::Rand::studentTLike(arr, urng, 20);
 	ret["studentT(20)"] = calc_emd_with_pdf(arr, student20_pdf, step);
 
-	arrd = Eigen::Rand::studentTLike(arrd, urng, 20);
-	ret["studentT(20)/double"] = calc_emd_with_pdf(arrd, student20_pdf, step);
-
 	arr = Eigen::Rand::fisherFLike(arr, urng, 1, 1);
 	ret["fisherF(1,1)"] = calc_emd_with_cdf(arr, fisher11_cdf, step);
 
-	arrd = Eigen::Rand::fisherFLike(arrd, urng, 1, 1);
-	ret["fisherF(1,1)/double"] = calc_emd_with_cdf(arrd, fisher11_cdf, step);
-
 	arr = Eigen::Rand::fisherFLike(arr, urng, 5, 5);
 	ret["fisherF(5,5)"] = calc_emd_with_pdf(arr, fisher55_pdf, step);
 
+#ifdef TEST_DOUBLE
+	Eigen::ArrayXd arrd{ size };
+	arrd = Eigen::Rand::uniformRealLike(arrd, urng);
+	ret["uniformReal/double"] = calc_emd_with_cdf(arrd, ur_cdf, step);
+
+	arrd = Eigen::Rand::normalLike(arrd, urng);
+	ret["normal/double"] = calc_emd_with_cdf(arrd, normal_cdf, step);
+
+	arrd = Eigen::Rand::lognormalLike(arrd, urng);
+	ret["lognormal/double"] = calc_emd_with_cdf(arrd, lognormal_cdf, step);
+
+	arrd = Eigen::Rand::gammaLike(arrd, urng, 1, 1);
+	ret["gamma(1,1)/double"] = calc_emd_with_pdf(arrd, gamma11_pdf, step);
+
+	arrd = Eigen::Rand::gammaLike(arrd, urng, 5, 1);
+	ret["gamma(5,1)/double"] = calc_emd_with_pdf(arrd, gamma51_pdf, step);
+
+	arrd = Eigen::Rand::gammaLike(arrd, urng, 0.2, 1);
+	ret["gamma(0.2,1)/double"] = calc_emd_with_pdf(arrd, gamma21_pdf, step);
+
+	arrd = Eigen::Rand::exponentialLike(arrd, urng);
+	ret["exponential/double"] = calc_emd_with_cdf(arrd, exp_cdf, step);
+
+	arrd = Eigen::Rand::weibullLike(arrd, urng, 2);
+	ret["weibull(2,1)/double"] = calc_emd_with_cdf(arrd, weibull_cdf, step);
+
+	arrd = Eigen::Rand::extremeValueLike(arrd, urng, 1, 1);
+	ret["extremeValue(1,1)/double"] = calc_emd_with_cdf(arrd, extreme_value_cdf, step);
+
+	arrd = Eigen::Rand::chiSquaredLike(arrd, urng, 7);
+	ret["chiSquared(7)/double"] = calc_emd_with_pdf(arrd, chisquared_pdf, step);
+
+	arrd = Eigen::Rand::cauchyLike(arrd, urng);
+	ret["cauchy/double"] = calc_emd_with_cdf(arrd, cauchy_cdf, step);
+
+	arrd = Eigen::Rand::studentTLike(arrd, urng, 1);
+	ret["studentT(1)/double"] = calc_emd_with_cdf(arrd, cauchy_cdf, step);
+
+	arrd = Eigen::Rand::studentTLike(arrd, urng, 5);
+	ret["studentT(5)/double"] = calc_emd_with_pdf(arrd, student5_pdf, step);
+
+	arrd = Eigen::Rand::studentTLike(arrd, urng, 20);
+	ret["studentT(20)/double"] = calc_emd_with_pdf(arrd, student20_pdf, step);
+
+	arrd = Eigen::Rand::fisherFLike(arrd, urng, 1, 1);
+	ret["fisherF(1,1)/double"] = calc_emd_with_cdf(arrd, fisher11_cdf, step);
+
 	arrd = Eigen::Rand::fisherFLike(arrd, urng, 5, 5);
 	ret["fisherF(5,5)/double"] = calc_emd_with_pdf(arrd, fisher55_pdf, step);
-
+#endif
 	return ret;
 }
 
diff --git a/test/benchmark.cpp b/benchmark/benchmark.cpp
similarity index 99%
rename from test/benchmark.cpp
rename to benchmark/benchmark.cpp
index d1996de..8ecb924 100644
--- a/test/benchmark.cpp
+++ b/benchmark/benchmark.cpp
@@ -820,7 +820,7 @@ int main(int argc, char** argv)
 
 	for (size_t i = 0; i < repeat; ++i)
 	{
-		/*for (auto& p : test_rng(std::mt19937{}, size, "rng\tmt19937", results))
+		for (auto& p : test_rng(std::mt19937{}, size, "rng\tmt19937", results))
 		{
 			time[p.first] += p.second;
 			timeSq[p.first] += p.second * p.second;
@@ -866,7 +866,7 @@ int main(int argc, char** argv)
 		{
 			time[p.first] += p.second;
 			timeSq[p.first] += p.second * p.second;
-		}*/
+		}
 
 		for (auto& p : test_eigenrand<std::mt19937_64>(size, "\t:ERand", results))
 		{
diff --git a/test/benchmark_mv.cpp b/benchmark/benchmark_mv.cpp
similarity index 100%
rename from test/benchmark_mv.cpp
rename to benchmark/benchmark_mv.cpp
diff --git a/test/comp_scipy.py b/benchmark/comp_scipy.py
similarity index 100%
rename from test/comp_scipy.py
rename to benchmark/comp_scipy.py
diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
index 7465b17..1f9444d 100644
--- a/doxygen/Doxyfile
+++ b/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "EigenRand"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.3.4
+PROJECT_NUMBER         = 0.3.5
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/test/packages.config b/test/packages.config
new file mode 100644
index 0000000..434bb12
--- /dev/null
+++ b/test/packages.config
@@ -0,0 +1,4 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn" version="1.8.1.4" targetFramework="native" />
+</packages>
\ No newline at end of file
diff --git a/test/test.cpp b/test/test.cpp
new file mode 100644
index 0000000..55e9c2e
--- /dev/null
+++ b/test/test.cpp
@@ -0,0 +1,72 @@
+#include <gtest/gtest.h>
+#include <Eigen/Dense>
+#include <EigenRand/EigenRand>
+
+template <class T>
+class ContinuousDistTest : public testing::Test
+{
+};
+
+using ETypes = testing::Types<float, double>;
+
+TYPED_TEST_CASE(ContinuousDistTest, ETypes);
+
+TYPED_TEST(ContinuousDistTest, balanced)
+{
+	using Matrix = Eigen::Matrix<TypeParam, -1, -1>;
+	Eigen::Rand::Vmt19937_64 gen{ 42 };
+	Matrix mat;
+
+	mat = Eigen::Rand::balanced<Matrix>(8, 8, gen);
+	mat = Eigen::Rand::balanced<Matrix>(3, 3, gen);
+	mat = Eigen::Rand::balanced<Matrix>(5, 5, gen);
+	std::cout << mat << std::endl;
+}
+
+TYPED_TEST(ContinuousDistTest, balanced2)
+{
+	using Matrix = Eigen::Matrix<TypeParam, -1, -1>;
+	Eigen::Rand::Vmt19937_64 gen{ 42 };
+	Matrix mat;
+
+	mat = Eigen::Rand::balanced<Matrix>(8, 8, gen, 0.5, 2);
+	mat = Eigen::Rand::balanced<Matrix>(3, 3, gen, 0.5, 2);
+	mat = Eigen::Rand::balanced<Matrix>(5, 5, gen, 0.5, 2);
+	std::cout << mat << std::endl;
+}
+
+TYPED_TEST(ContinuousDistTest, stdNormal)
+{
+	using Matrix = Eigen::Matrix<TypeParam, -1, -1>;
+	Eigen::Rand::Vmt19937_64 gen{ 42 };
+	Matrix mat;
+
+	mat = Eigen::Rand::normal<Matrix>(8, 8, gen);
+	mat = Eigen::Rand::normal<Matrix>(3, 3, gen);
+	mat = Eigen::Rand::normal<Matrix>(5, 5, gen);
+	std::cout << mat << std::endl;
+}
+
+TYPED_TEST(ContinuousDistTest, normal)
+{
+	using Matrix = Eigen::Matrix<TypeParam, -1, -1>;
+	Eigen::Rand::Vmt19937_64 gen{ 42 };
+	Matrix mat;
+
+	mat = Eigen::Rand::normal<Matrix>(8, 8, gen, 1, 2);
+	mat = Eigen::Rand::normal<Matrix>(3, 3, gen, 1, 2);
+	mat = Eigen::Rand::normal<Matrix>(5, 5, gen, 1, 2);
+	std::cout << mat << std::endl;
+}
+
+TYPED_TEST(ContinuousDistTest, exponential)
+{
+	using Matrix = Eigen::Matrix<TypeParam, -1, -1>;
+	Eigen::Rand::Vmt19937_64 gen{ 42 };
+	Matrix mat;
+
+	mat = Eigen::Rand::exponential<Matrix>(8, 8, gen, 2);
+	mat = Eigen::Rand::exponential<Matrix>(3, 3, gen, 2);
+	mat = Eigen::Rand::exponential<Matrix>(5, 5, gen, 2);
+	std::cout << mat << std::endl;
+}
diff --git a/test/test.vcxproj b/test/test.vcxproj
new file mode 100644
index 0000000..8d78d63
--- /dev/null
+++ b/test/test.vcxproj
@@ -0,0 +1,130 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{643d8602-fe0d-4eaf-841c-e690ee6e53fd}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
+    <ConfigurationType>Application</ConfigurationType>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings" />
+  <ImportGroup Label="Shared" />
+  <ImportGroup Label="PropertySheets" />
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IncludePath>$(SolutionDir);E:\AddInclude;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <ItemGroup>
+    <ClCompile Include="test.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
+  <ItemDefinitionGroup />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="..\packages\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.1.8.1.4\build\native\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.targets" Condition="Exists('..\packages\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.1.8.1.4\build\native\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.targets')" />
+  </ImportGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PreprocessorDefinitions>X64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them.  For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('..\packages\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.1.8.1.4\build\native\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.1.8.1.4\build\native\Microsoft.googletest.v140.windesktop.msvcstl.static.rt-dyn.targets'))" />
+  </Target>
+</Project>
\ No newline at end of file