From 40f4f46bec7543ada513231497365f087a925dd1 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sun, 30 Apr 2023 15:16:57 -0400 Subject: [PATCH 1/9] Rebuild the nix flake; drop htoml dependency Significantly simplified nix flake setup. Replaced the `htoml` dependency in the spec test with `tomland`. --- CHANGELOG.md | 3 + Frames.cabal | 14 +-- default.nix | 39 ------ flake.lock | 290 ++++----------------------------------------- flake.nix | 186 ++++++----------------------- test/DataCSV.hs | 98 +++++++-------- test/examples.toml | 132 ++++++++++----------- 7 files changed, 184 insertions(+), 578 deletions(-) delete mode 100644 default.nix diff --git a/CHANGELOG.md b/CHANGELOG.md index d3d55b9..7904752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# 0.7.4 +Replace the `htoml` package used in a test with `tomland`. + # 0.7.2 - Add `writeCSVopts` that accepts options to specify the CSV delimiter. diff --git a/Frames.cabal b/Frames.cabal index 5791de4..6660fe5 100644 --- a/Frames.cabal +++ b/Frames.cabal @@ -1,5 +1,5 @@ name: Frames -version: 0.7.3 +version: 0.7.4 synopsis: Data frames For working with tabular data files description: User-friendly, type safe, runtime efficient tooling for working with tabular data deserialized from @@ -70,7 +70,7 @@ library text >= 1.1.1.0, template-haskell, transformers, - vector, + vector < 0.13, readable >= 0.3.1, pipes >= 4.1 && < 5, pipes-bytestring >= 2.1.6 && < 2.2, @@ -150,7 +150,7 @@ executable demo pipes hs-source-dirs: demo/framestack/app default-language: Haskell2010 - ghc-options: -O2 + ghc-options: -O2 -fsimpl-tick-factor=200 -- ghc-options: -O2 -fllvm executable tutorial @@ -176,7 +176,7 @@ executable benchdemo hs-source-dirs: benchmarks default-language: Haskell2010 -- ghc-options: -O2 - ghc-options: -O2 -fllvm + ghc-options: -O2 -fllvm -fsimpl-tick-factor=200 -- A demonstration of dealing with missing data. Provided for source -- code and experimentation rather than a useful executable. @@ -235,9 +235,9 @@ test-suite spec UncurryFold UncurryFoldNoHeader UncurryFoldPartialData Categorical Chunks Issue145 build-depends: base, text, hspec, Frames, template-haskell, - temporary, directory, htoml, regex-applicative, pretty, - unordered-containers, pipes, HUnit, vinyl, - foldl >= 1.3 && < 1.5, + temporary, directory, tomland, regex-applicative, pretty, + unordered-containers, pipes, HUnit, vinyl, + foldl >= 1.3 && < 1.5, validation-selective, attoparsec, lens, bytestring ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall default-language: Haskell2010 diff --git a/default.nix b/default.nix deleted file mode 100644 index e7a3d67..0000000 --- a/default.nix +++ /dev/null @@ -1,39 +0,0 @@ -{ mkDerivation, attoparsec, base, bytestring, Chart, Chart-diagrams -, containers, contravariant, criterion, deepseq, diagrams-lib -, diagrams-rasterific, directory, discrimination, foldl, ghc-prim -, hashable, hspec, htoml, http-client, http-client-tls, HUnit, lens -, list-t, microlens, pipes, pipes-bytestring, pipes-group -, pipes-parse, pipes-safe, pretty, primitive, readable -, regex-applicative, statistics, stdenv, template-haskell, temporary -, text, transformers, unordered-containers, vector, vector-th-unbox -, vinyl, zip-archive -}: -mkDerivation { - pname = "Frames"; - version = "0.7.1"; - src = ./.; - configureFlags = [ "-fdemos" ]; - isLibrary = true; - isExecutable = true; - libraryHaskellDepends = [ - base bytestring containers contravariant deepseq discrimination - ghc-prim hashable pipes pipes-bytestring pipes-group pipes-parse - pipes-safe primitive readable template-haskell text transformers - vector vector-th-unbox vinyl - ]; - executableHaskellDepends = [ - base bytestring Chart Chart-diagrams containers diagrams-lib - diagrams-rasterific directory foldl ghc-prim http-client http-client-tls - list-t microlens pipes pipes-safe readable statistics template-haskell - text transformers vector vinyl zip-archive - ]; - testHaskellDepends = [ - attoparsec base directory foldl hspec htoml HUnit lens pipes pretty - regex-applicative template-haskell temporary text - unordered-containers vinyl - ]; - benchmarkHaskellDepends = [ base criterion pipes transformers ]; - doBenchmark = true; - description = "Data frames For working with tabular data files"; - license = stdenv.lib.licenses.bsd3; -} diff --git a/flake.lock b/flake.lock index c14e8d6..dfe4004 100644 --- a/flake.lock +++ b/flake.lock @@ -1,70 +1,15 @@ { "nodes": { - "constraints-extras": { - "flake": false, - "locked": { - "narHash": "sha256-WGDSpT37RrHwpQtExGkL5eEmBk/s9b0rxtT9DYqSGg4=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/constraints-extras-0.3.2.1/constraints-extras-0.3.2.1.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/constraints-extras-0.3.2.1/constraints-extras-0.3.2.1.tar.gz" - } - }, - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1641205782, - "narHash": "sha256-4jY7RCWUoZ9cKD8co0/4tFARpWB+57+r1bLLvXNJliY=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "b7547d3eed6f32d06102ead8991ec52ab0a4f1a7", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, "flake-utils": { - "locked": { - "lastModified": 1644229661, - "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_2": { - "locked": { - "lastModified": 1629481132, - "narHash": "sha256-JHgasjPR0/J1J3DRm4KxM4zTyAj4IOJY8vIl75v/kPI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "997f7efcb746a9c140ce1f13c72263189225f482", - "type": "github" + "inputs": { + "systems": "systems" }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_3": { "locked": { - "lastModified": 1638122382, - "narHash": "sha256-sQzZzAbvKEqN9s0bzWuYmRaA03v40gaJ4+iL1LXjaeI=", + "lastModified": 1681202837, + "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", "owner": "numtide", "repo": "flake-utils", - "rev": "74f7e4319258e287b0f9cb95426c9853b282730b", + "rev": "cfacdce06f30d2b68473a46042957675eebb3401", "type": "github" }, "original": { @@ -73,232 +18,41 @@ "type": "github" } }, - "fourmolu": { - "flake": false, - "locked": { - "narHash": "sha256-uo5UE2SzrimnZl+JjJ30Hlg/nIw1OXJTPFIgkQopaI0=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/fourmolu-0.5.0.1/fourmolu-0.5.0.1.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/fourmolu-0.5.0.1/fourmolu-0.5.0.1.tar.gz" - } - }, - "ghc-exactprint": { - "flake": false, - "locked": { - "narHash": "sha256-8OWLBQj0WYi1f91EE3d5Pq+lTjY+FQei37NEedDtKeo=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/ghc-exactprint-1.4.1/ghc-exactprint-1.4.1.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/ghc-exactprint-1.4.1/ghc-exactprint-1.4.1.tar.gz" - } - }, - "gitignore": { - "flake": false, - "locked": { - "lastModified": 1611672876, - "narHash": "sha256-qHu3uZ/o9jBHiA3MEKHJ06k7w4heOhA+4HCSIvflRxo=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "211907489e9f198594c0eb0ca9256a1949c9d412", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, - "hie-bios": { - "flake": false, - "locked": { - "narHash": "sha256-nd+FfUQVZNxJfKZkAWA3dF0JwRgXntL+1gGvyNHDbKc=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/hie-bios-0.9.0/hie-bios-0.9.0.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/hie-bios-0.9.0/hie-bios-0.9.0.tar.gz" - } - }, - "hlint": { - "flake": false, - "locked": { - "narHash": "sha256-Kz6adx97kY7ojoDlw3y0R6LQ0h/EtXGR5+N07/b6uGk=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/hlint-3.3.6/hlint-3.3.6.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/hlint-3.3.6/hlint-3.3.6.tar.gz" - } - }, - "hls": { - "inputs": { - "constraints-extras": "constraints-extras", - "flake-compat": "flake-compat", - "flake-utils": "flake-utils_2", - "fourmolu": "fourmolu", - "ghc-exactprint": "ghc-exactprint", - "gitignore": "gitignore", - "hie-bios": "hie-bios", - "hlint": "hlint", - "implicit-hie-cradle": "implicit-hie-cradle", - "lsp": "lsp", - "lsp-test": "lsp-test", - "lsp-types": "lsp-types", - "nixpkgs": "nixpkgs", - "pre-commit-hooks": "pre-commit-hooks", - "retrie": "retrie" - }, - "locked": { - "lastModified": 1646601891, - "narHash": "sha256-JjkrVZsRSNr7UtoRiX3BMAX4hlXo9y7Ji902IqWcw3o=", - "owner": "haskell", - "repo": "haskell-language-server", - "rev": "73fdd91e5a5a6a7cabdafb453f23749693029cc5", - "type": "github" - }, - "original": { - "owner": "haskell", - "repo": "haskell-language-server", - "type": "github" - } - }, - "implicit-hie-cradle": { - "flake": false, - "locked": { - "narHash": "sha256-2NmucBBI7Qi1UGXWG27XFZRCeqeRiwVFWmJKZnp6R5U=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/implicit-hie-cradle-0.3.0.5/implicit-hie-cradle-0.3.0.5.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/implicit-hie-cradle-0.3.0.5/implicit-hie-cradle-0.3.0.5.tar.gz" - } - }, - "lsp": { - "flake": false, - "locked": { - "narHash": "sha256-OcyNHNRh9j5nbJ8SjaNAWIEKuixAJlA7+vTimFY0c2c=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-1.4.0.0/lsp-1.4.0.0.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-1.4.0.0/lsp-1.4.0.0.tar.gz" - } - }, - "lsp-test": { - "flake": false, - "locked": { - "narHash": "sha256-IOmbQH6tKdu9kAyirvLx6xFS2N+/tbs6vZn0mNGm3No=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-test-0.14.0.2/lsp-test-0.14.0.2.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-test-0.14.0.2/lsp-test-0.14.0.2.tar.gz" - } - }, - "lsp-types": { - "flake": false, - "locked": { - "narHash": "sha256-HGg4upgirM6/px+vflY5S0Y79gAIDpl32Ad9mbbzTdU=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-types-1.4.0.1/lsp-types-1.4.0.1.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/lsp-types-1.4.0.1/lsp-types-1.4.0.1.tar.gz" - } - }, "nixpkgs": { "locked": { - "lastModified": 1639357775, - "narHash": "sha256-mJJFCPqZi1ZO3CvgEfN2nFAYv4uAJSRnTKzLFi61+WA=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "c473cc8714710179df205b153f4e9fa007107ff9", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixpkgs-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_2": { - "locked": { - "lastModified": 1641016545, - "narHash": "sha256-JMNwvnBzG0RjGG3eH27Y5/GlJ9ryeCdGJfqGbqxnmZY=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "6f05cfdb1e78d36c0337516df674560e4b51c79b", - "type": "github" - }, - "original": { - "id": "nixpkgs", - "type": "indirect" - } - }, - "nixpkgs_3": { - "locked": { - "lastModified": 1646254136, - "narHash": "sha256-8nQx02tTzgYO21BP/dy5BCRopE8OwE8Drsw98j+Qoaw=", + "lastModified": 1682779028, + "narHash": "sha256-tFfSbwSLobpHRznAa35KEU3R+fsFWTlmpFhTUdXq8RE=", "owner": "nixos", "repo": "nixpkgs", - "rev": "3e072546ea98db00c2364b81491b893673267827", + "rev": "54abe781c482f51ff4ff534ebaba77db5bd97442", "type": "github" }, "original": { "owner": "nixos", - "ref": "nixos-unstable", + "ref": "nixpkgs-unstable", "repo": "nixpkgs", "type": "github" } }, - "pre-commit-hooks": { + "root": { "inputs": { - "flake-utils": "flake-utils_3", - "nixpkgs": "nixpkgs_2" - }, - "locked": { - "lastModified": 1624971177, - "narHash": "sha256-Amf/nBj1E77RmbSSmV+hg6YOpR+rddCbbVgo5C7BS0I=", - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "rev": "397f0713d007250a2c7a745e555fa16c5dc8cadb", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "type": "github" + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" } }, - "retrie": { - "flake": false, + "systems": { "locked": { - "narHash": "sha256-SrUyFea9Qr2SYeNVDJfWZfCguJV2rHK2mO/FO4xLFaY=", - "type": "tarball", - "url": "https://hackage.haskell.org/package/retrie-1.2.0.1/retrie-1.2.0.1.tar.gz" + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" }, "original": { - "type": "tarball", - "url": "https://hackage.haskell.org/package/retrie-1.2.0.1/retrie-1.2.0.1.tar.gz" - } - }, - "root": { - "inputs": { - "flake-utils": "flake-utils", - "hls": "hls", - "nixpkgs": "nixpkgs_3" + "owner": "nix-systems", + "repo": "default", + "type": "github" } } }, diff --git a/flake.nix b/flake.nix index 41ec2e1..c67a178 100644 --- a/flake.nix +++ b/flake.nix @@ -1,162 +1,48 @@ { description = "Data frames for tabular data."; - nixConfig = { - extra-substituters = [ - "https://haskell-language-server.cachix.org" - ]; - extra-trusted-public-keys = [ - "haskell-language-server.cachix.org-1:juFfHrwkOxqIOZShtC4YC1uT1bBcq2RSvC7OMKx0Nz8=" - ]; - }; - inputs = { - nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; flake-utils.url = "github:numtide/flake-utils"; - hls.url = "github:haskell/haskell-language-server"; + nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; }; - outputs = { self, nixpkgs, hls, flake-utils}: - - flake-utils.lib.eachDefaultSystem (system: let - - pkgs = import nixpkgs { - inherit system; - overlays = [ self.overlay ]; - }; - - compilerVersionFromHsPkgs = hsPkgs: - pkgs.lib.replaceStrings [ "." ] [ "" ] hsPkgs.ghc.version; - - hspkgs810 = pkgs.haskell.packages."ghc8107".override { - overrides = pkgs.frameHaskellOverlay-8107; - }; - hspkgs92 = pkgs.haskell.packages."ghc921".override { - overrides = pkgs.frameHaskellOverlay-921; - }; - - mkPackage = hspkgs: - hspkgs.developPackage { - root = pkgs.lib.cleanSource ./.; - name = "Frames"; - returnShellEnv = false; - withHoogle = true; + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let pkg-name = "Frames"; + pkgs = import nixpkgs { + inherit system; + config = { allowBroken = true; }; }; - - mkShell = hspkgs: - let - compilerVersion = compilerVersionFromHsPkgs hspkgs; - myModifier = drv: - pkgs.haskell.lib.addBuildTools drv (with hspkgs; [ - cabal-install - ghcid - hls.packages.${system}."haskell-language-server-${compilerVersion}" - hasktags - ]); - in - (myModifier (mkPackage hspkgs)).envFunc {}; - - mkSimpleShell = compilerVersion: - let - compiler = pkgs.haskell.compiler."ghc${compilerVersion}"; - in - pkgs.mkShell { - buildInputs = [ - pkgs.haskell.compiler."ghc${compilerVersion}" - pkgs.haskell.packages."ghc${compilerVersion}".cabal-install - pkgs.llvmPackages_latest.llvm - ] ++ - pkgs.lib.optional (compilerVersion != "921") - hls.packages.${system}."haskell-language-server-${compilerVersion}"; - }; - in { - packages = { - Frames-8107 = mkPackage hspkgs810; - Frames-921 = mkPackage hspkgs92; - }; - - devShell = mkSimpleShell "921"; - - devShells = { - Frames-8107 = mkShell hspkgs810; - Frames-921 = mkShell hspkgs92; - }; - }) // { - - overlay = final: prev: { - frameHaskellOverlay-921 = hfinal: hprev: ( - (final.frameHaskellOverlay-8107 hfinal hprev) // ( - let doJailbreak = prev.haskell.lib.doJailbreak; - overrideSrc = prev.haskell.lib.overrideSrc; - dontHaddock = prev.haskell.lib.dontHaddock; - dontCheck = prev.haskell.lib.dontCheck; - in { - # Temporary fixes for breakage with ghc-9.2.1 - attoparsec = dontCheck hprev.attoparsec; - base-compat-batteries = dontCheck hprev.base-compat-batteries; - basement = dontHaddock hprev.basement; - blaze-builder = dontCheck hprev.blaze-builder; - blaze-markup = dontCheck hprev.blaze-markup; - case-insensitive = dontCheck hprev.case-insensitive; - cassava = dontCheck hprev.cassava; - conduit-extra = dontCheck hprev.conduit-extra; - criterion = dontCheck hprev.criterion; - cryptonite = dontHaddock hprev.cryptonite; - fast-logger = dontCheck hprev.fast-logger; - htoml = dontCheck hprev.htoml; - lens-family-core = dontHaddock hprev.lens-family-core; - ListLike = dontCheck hprev.ListLike; - microlens = dontHaddock hprev.microlens; - microstache = dontCheck hprev.microstache; - readable = dontHaddock (doJailbreak hprev.readable); - QuickCheck = dontCheck hprev.QuickCheck; - operational = dontHaddock hprev.operational; - optparse-applicative = dontCheck hprev.optparse-applicative; - generic-deriving = dontHaddock hprev.generic-deriving; - streaming-commons = dontCheck hprev.streaming-commons; - utf8-string = dontCheck hprev.utf8-string; - word8 = dontCheck hprev.word8; - })); - - frameHaskellOverlay-8107 = hfinal: hprev: - let doJailbreak = prev.haskell.lib.doJailbreak; - overrideSrc = prev.haskell.lib.overrideSrc; - dontHaddock = prev.haskell.lib.dontHaddock; - dontCheck = prev.haskell.lib.dontCheck; - in { - statestack = doJailbreak hprev.statestack; - svg-builder = doJailbreak hprev.svg-builder; - # see https://github.com/JonasDuregard/sized-functors/pull/10 - # https://github.com/ - size-based = doJailbreak (overrideSrc hprev.size-based { - version = "unstable-2022-01-20"; - src = final.fetchzip { - url = "https://github.com/byorgey/sized-functors/archive/master.tar.gz"; - sha256 = "sha256-pVJbEGF4/lvXmWIypwkMQBYygOx3TQwLJbMpfdYovdY="; - }; - }); - monoid-extras = doJailbreak hprev.monoid-extras; - active = doJailbreak hprev.active; - dual-tree = doJailbreak hprev.dual-tree; - diagrams-core = doJailbreak hprev.diagrams-core; - diagrams-lib = doJailbreak hprev.diagrams-lib; - diagrams-postscript = doJailbreak hprev.diagrams-postscript; - SVGFonts = doJailbreak hprev.SVGFonts; - diagrams-svg = doJailbreak hprev.diagrams-svg; - diagrams-rasterific = doJailbreak hprev.diagrams-rasterific; - Chart = doJailbreak hprev.Chart; - linear = hprev.callHackage "linear" "1.21.8" {}; - vinyl = overrideSrc hprev.vinyl { - version = "0.14.1"; - src = prev.fetchFromGitHub { - owner = "VinylRecords"; - repo = "Vinyl"; - rev = "892d597f9dd8e96c0853269ab78141ae2e03aa2c"; - hash = "sha256-ONw+8D1r4xX9+KgYOFpTNhk+pCsNZW8DbbAzOheSkS0="; + haskell = pkgs.haskellPackages; + haskell-overlay = final: prev: + let overrideSrc = pkgs.haskell.lib.overrideSrc; + in { + ${pkg-name} = hspkgs.callCabal2nix pkg-name ./. {}; + # Add here any package overrides you may need }; + hspkgs = haskell.override { + overrides = haskell-overlay; }; - readable = doJailbreak hprev.readable; + in { + packages = pkgs; + apps.init = pkgs.writeShellApplication { + name = "cabal-init"; + runtimeInputs = [hspkgs.ghc hspkgs.cabal-install]; + text = '' + cabal init -p ${pkg-name} + ''; }; - }; - }; + inherit haskell-overlay; + defaultPackage = hspkgs.${pkg-name}; + devShell = hspkgs.shellFor { + packages = p: [p.${pkg-name}]; + root = ./.; + withHoogle = true; + buildInputs = with hspkgs; [ + haskell-language-server + cabal-install + ]; + }; + } + ); } diff --git a/test/DataCSV.hs b/test/DataCSV.hs index c8b7fab..8f50106 100644 --- a/test/DataCSV.hs +++ b/test/DataCSV.hs @@ -1,56 +1,58 @@ -{-# LANGUAGE DeriveLift, OverloadedStrings, TemplateHaskell #-} +{-# LANGUAGE DeriveLift #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} + module DataCSV where -import Control.Monad ((>=>)) + import Data.Bifunctor (first) -import qualified Data.ByteString as BS -import qualified Data.HashMap.Lazy as H -import Data.Maybe (catMaybes) +import qualified Data.Foldable as F import qualified Data.Text as T import qualified Data.Text.IO as T -import Data.Text.Encoding (decodeUtf8) -import Language.Haskell.TH.Syntax (Lift(..)) -import Text.Toml -import Text.Toml.Types (Node (VTable, VString), Table) +import Language.Haskell.TH.Syntax (Lift (..)) +import qualified Toml +import Validation -data CsvExample = CsvExample { - name :: String - , csv :: String - , generated :: String } - deriving Lift +data CsvExample = CsvExample + { name :: String + , csv :: String + , generated :: String + } + deriving (Lift, Show) --- instance Lift CsvExample where --- lift (CsvExample n c g) = [e| CsvExample n c g |] +keyText :: Toml.Key -> T.Text +keyText = F.fold . cleanup . map Toml.unPiece . F.toList . Toml.unKey + where + -- Top-level table names are parsed as @"name" :| + -- ["name"]@. Remove that duplication here. + cleanup [x, y] | x == y = [x] + cleanup x = x -examplesFrom :: FilePath -> IO [CsvExample] -examplesFrom fp = - (either error id - . ((first show . parseTomlDoc "examples") >=> go) - . decodeUtf8) - <$> BS.readFile fp - where go :: Table -> Either String [CsvExample] - go = fmap catMaybes . mapM (uncurry ex . first T.unpack) . H.toList - ex :: String -> Node -> Either String (Maybe CsvExample) - ex k (VTable v) = - do c <- case H.lookup "csv" v of - Nothing -> Right Nothing -- ("No csv key in "++k) - Just (VString c) -> Right (Just (T.unpack c)) - Just _ -> Left ("csv key not a string in " ++ k) - g <- case H.lookup "generated" v of - Nothing -> Left ("No generated key in " ++ k) - Just (VString g) -> Right (Just (T.unpack g)) - Just _ -> Left ("generated key not a string in " ++ k) - return (CsvExample k <$> c <*> g) - ex k _ = Left (k ++ " is not a table") +-- | Parse a TOML file that is a top-level table whose values are all +-- the same type. The @tomland@ codec API is centered around starting +-- with a key, but a top-level table does not have a key, so we must +-- use the lower level 'Toml.parse' and 'Toml.tomlTables' before +-- repeatedly applying the provided 'Toml.TomlCodec'. +parseFileOf :: forall a. Toml.TomlCodec a -> T.Text -> Either [T.Text] [(T.Text, a)] +parseFileOf codec = + first (map Toml.prettyTomlDecodeError) + . validationToEither + . traverse (uncurry go) + . Toml.toList + . Toml.tomlTables + . either (error . show) id + . Toml.parse + where + go :: Toml.Key -> Toml.TOML -> Validation [Toml.TomlDecodeError] (T.Text, a) + go k v = (keyText k,) <$> Toml.runTomlCodec codec v -generatedFrom :: FilePath -> String -> IO String -generatedFrom fp key = (either error id . (>>= go) - . first show . parseTomlDoc "examples") - <$> T.readFile fp - where go :: Table -> Either String String - go toml = do tbl <- case H.lookup (T.pack key) toml of - Just (VTable t) -> Right t - _ -> Left (key ++ " is not a table") - case H.lookup "generated" tbl of - Just (VString g) -> Right (T.unpack g) - Just _ -> Left ("generated key not a string in " ++ key) - Nothing -> Left ("No generated key in " ++ key) +parseExamples :: FilePath -> IO (Either [T.Text] [CsvExample]) +parseExamples = fmap (fmap (map mkExample) . parseFileOf exampleCodec) . T.readFile + where + exampleCodec = Toml.pair (Toml.string "csv") (Toml.string "generated") + mkExample (name', (csv', generated')) = + CsvExample (T.unpack name') csv' generated' + +-- | Wraps 'parseExamples' to call 'error' on any parse errors. +examplesFrom :: FilePath -> IO [CsvExample] +examplesFrom = fmap (either (error . show) id) . parseExamples diff --git a/test/examples.toml b/test/examples.toml index 9e90e23..e4bfaac 100644 --- a/test/examples.toml +++ b/test/examples.toml @@ -102,72 +102,72 @@ managerId' :: forall f_22 g_23 rs_24 . (Functor f_22, Rec g_23 rs_24 -> f_22 (Rec g_23 rs_24) managerId' = rlens' @ManagerId""" -[managers_employees] -generated = """ -type ManagerRec = Record [Id, Manager, Age, Pay] -managerRecParser :: ParserOptions -managerRecParser = ParserOptions Nothing (T.pack ",") (Frames.CSV.RFC4180Quoting '"') - -type Id = "id" :-> Int -id :: forall f_0 rs_1 . (Functor f_0, Id ∈ rs_1) => - (Int -> f_0 Bool) -> Record rs_1 -> f_0 (Record rs_1) -id = rlens @Id . rfield -id' :: forall f_2 g_3 rs_4 . (Functor f_2, Functor g_3, Id ∈ rs_4) => - (g_3 Id -> f_2 (g_3 Id)) -> Rec g_3 rs_4 -> f_2 (Rec g_3 rs_4) -id' = rlens' @Id - -type Manager = "manager" :-> Text -manager :: forall f_5 rs_6 . (Functor f_5, Manager ∈ rs_6) => - (Text -> f_5 Text) -> Record rs_6 -> f_5 (Record rs_6) -manager = rlens @Manager . rfield -manager' :: forall f_7 g_8 rs_9 . (Functor f_7, - Manager ∈ rs_9) => - (g_8 Manager -> f_7 (g_8 Manager)) -> Rec g_8 rs_9 -> f_7 (Rec g_8 rs_9) -manager' = rlens' @Manager - -type Age = "age" :-> Int -age :: forall f_10 rs_11 . (Functor f_10, Age ∈ rs_11) => - (Int -> f_10 Int) -> Record rs_11 -> f_10 (Record rs_11) -age = rlens @Age . rfield -age' :: forall f_12 g_13 rs_14 . (Functor f_12, - Age ∈ rs_14) => - (g_13 Age -> f_12 (g_13 Age)) -> Rec g_13 rs_14 -> f_12 (Rec g_13 rs_14) -age' = rlens' @Age - -type Pay = "pay" :-> Double -pay :: forall f_15 rs_16 . (Functor f_15, Pay ∈ rs_16) => - (Double -> f_15 Double) -> Record rs_16 -> f_15 (Record rs_16) -pay = rlens @Pay . rfield -pay' :: forall f_17 g_18 rs_19 . (Functor f_17, - Pay ∈ rs_19) => - (g_18 Pay -> f_17 (g_18 Pay)) -> Rec g_18 rs_19 -> f_17 (Rec g_18 rs_19) -pay' = rlens' @Pay - -type EmployeeRec = Record ["id" :-> Int, "employee" :-> Text, "age" :-> Int, "pay" :-> Double, "manager_id" :-> Int] -employeeRecParser :: ParserOptions -employeeRecParser = ParserOptions Nothing (T.pack ",") (Frames.CSV.RFC4180Quoting '"') - -type Employee = "employee" :-> Text -employee :: forall f_5 rs_6 . (Functor f_5, Employee ∈ rs_6) => - (Text -> f_5 Text) -> Record rs_6 -> f_5 (Record rs_6) -employee = rlens @Employee . rfield -employee' :: forall f_7 g_8 rs_9 . (Functor f_7, - Employee ∈ rs_9) => - (g_8 Employee -> f_7 (g_8 Employee)) -> Rec g_8 rs_9 -> f_7 (Rec g_8 rs_9) -employee' = rlens' @Employee - -type Age = "age" :-> Int - -type ManagerId = "manager_id" :-> Int -managerId :: forall f_20 rs_21 . (Functor f_20, ManagerId ∈ rs_21) => - (Int -> f_20 Int) -> Record rs_21 -> f_20 (Record rs_21) -managerId = rlens @ManagerId . rfield -managerId' :: forall f_22 g_23 rs_24 . (Functor f_22, - ManagerId ∈ rs_24) => - (g_23 ManagerId -> f_22 (g_23 ManagerId)) -> - Rec g_23 rs_24 -> f_22 (Rec g_23 rs_24) -managerId' = rlens' @ManagerId -""" +# [managers_employees] +# generated = """ +# type ManagerRec = Record [Id, Manager, Age, Pay] +# managerRecParser :: ParserOptions +# managerRecParser = ParserOptions Nothing (T.pack ",") (Frames.CSV.RFC4180Quoting '"') + +# type Id = "id" :-> Int +# id :: forall f_0 rs_1 . (Functor f_0, Id ∈ rs_1) => +# (Int -> f_0 Bool) -> Record rs_1 -> f_0 (Record rs_1) +# id = rlens @Id . rfield +# id' :: forall f_2 g_3 rs_4 . (Functor f_2, Functor g_3, Id ∈ rs_4) => +# (g_3 Id -> f_2 (g_3 Id)) -> Rec g_3 rs_4 -> f_2 (Rec g_3 rs_4) +# id' = rlens' @Id + +# type Manager = "manager" :-> Text +# manager :: forall f_5 rs_6 . (Functor f_5, Manager ∈ rs_6) => +# (Text -> f_5 Text) -> Record rs_6 -> f_5 (Record rs_6) +# manager = rlens @Manager . rfield +# manager' :: forall f_7 g_8 rs_9 . (Functor f_7, +# Manager ∈ rs_9) => +# (g_8 Manager -> f_7 (g_8 Manager)) -> Rec g_8 rs_9 -> f_7 (Rec g_8 rs_9) +# manager' = rlens' @Manager + +# type Age = "age" :-> Int +# age :: forall f_10 rs_11 . (Functor f_10, Age ∈ rs_11) => +# (Int -> f_10 Int) -> Record rs_11 -> f_10 (Record rs_11) +# age = rlens @Age . rfield +# age' :: forall f_12 g_13 rs_14 . (Functor f_12, +# Age ∈ rs_14) => +# (g_13 Age -> f_12 (g_13 Age)) -> Rec g_13 rs_14 -> f_12 (Rec g_13 rs_14) +# age' = rlens' @Age + +# type Pay = "pay" :-> Double +# pay :: forall f_15 rs_16 . (Functor f_15, Pay ∈ rs_16) => +# (Double -> f_15 Double) -> Record rs_16 -> f_15 (Record rs_16) +# pay = rlens @Pay . rfield +# pay' :: forall f_17 g_18 rs_19 . (Functor f_17, +# Pay ∈ rs_19) => +# (g_18 Pay -> f_17 (g_18 Pay)) -> Rec g_18 rs_19 -> f_17 (Rec g_18 rs_19) +# pay' = rlens' @Pay + +# type EmployeeRec = Record ["id" :-> Int, "employee" :-> Text, "age" :-> Int, "pay" :-> Double, "manager_id" :-> Int] +# employeeRecParser :: ParserOptions +# employeeRecParser = ParserOptions Nothing (T.pack ",") (Frames.CSV.RFC4180Quoting '"') + +# type Employee = "employee" :-> Text +# employee :: forall f_5 rs_6 . (Functor f_5, Employee ∈ rs_6) => +# (Text -> f_5 Text) -> Record rs_6 -> f_5 (Record rs_6) +# employee = rlens @Employee . rfield +# employee' :: forall f_7 g_8 rs_9 . (Functor f_7, +# Employee ∈ rs_9) => +# (g_8 Employee -> f_7 (g_8 Employee)) -> Rec g_8 rs_9 -> f_7 (Rec g_8 rs_9) +# employee' = rlens' @Employee + +# type Age = "age" :-> Int + +# type ManagerId = "manager_id" :-> Int +# managerId :: forall f_20 rs_21 . (Functor f_20, ManagerId ∈ rs_21) => +# (Int -> f_20 Int) -> Record rs_21 -> f_20 (Record rs_21) +# managerId = rlens @ManagerId . rfield +# managerId' :: forall f_22 g_23 rs_24 . (Functor f_22, +# ManagerId ∈ rs_24) => +# (g_23 ManagerId -> f_22 (g_23 ManagerId)) -> +# Rec g_23 rs_24 -> f_22 (Rec g_23 rs_24) +# managerId' = rlens' @ManagerId +# """ [double_gt_bool] csv = """ From 77f17a588e3d66e067f2c93796426a55128107d5 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:18:56 -0400 Subject: [PATCH 2/9] Relax dependencies; test with ghc-9.4.6 --- Frames.cabal | 10 +++++----- cabal.project | 6 ------ 2 files changed, 5 insertions(+), 11 deletions(-) delete mode 100644 cabal.project diff --git a/Frames.cabal b/Frames.cabal index 6660fe5..cfb21f0 100644 --- a/Frames.cabal +++ b/Frames.cabal @@ -30,7 +30,7 @@ extra-source-files: benchmarks/*.hs benchmarks/*.py data/left1.csv data/right1.csv data/left_summary.csv data/FL2.csv cabal-version: >=1.10 -tested-with: GHC == 8.6.5 || == 8.8.4 || == 8.10.7 || == 9.0.1 || == 9.2.1 +tested-with: GHC == 8.6.5 || == 8.8.4 || == 8.10.7 || == 9.0.1 || == 9.2.1 || == 9.4.6 source-repository head type: git @@ -64,13 +64,13 @@ library TypeOperators, ConstraintKinds, StandaloneDeriving, UndecidableInstances, ScopedTypeVariables, OverloadedStrings, TypeApplications - build-depends: base >=4.10 && <4.17, - ghc-prim >=0.3 && <0.9, - primitive >= 0.6 && < 0.8, + build-depends: base >=4.10 && <4.20, + ghc-prim >=0.3 && <0.10, + primitive >= 0.6 && < 0.9, text >= 1.1.1.0, template-haskell, transformers, - vector < 0.13, + vector < 0.14, readable >= 0.3.1, pipes >= 4.1 && < 5, pipes-bytestring >= 2.1.6 && < 2.2, diff --git a/cabal.project b/cabal.project deleted file mode 100644 index dc4219f..0000000 --- a/cabal.project +++ /dev/null @@ -1,6 +0,0 @@ -packages: . - -source-repository-package - type: git - location: https://github.com/mirokuratczyk/htoml - tag: 33971287445c5e2531d9605a287486dfc3cbe1da \ No newline at end of file From 9052da4c82327bc24da82c6a4637ea139ded67e0 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:19:57 -0400 Subject: [PATCH 3/9] Time parsing demonstration --- Frames.cabal | 11 +++++++++++ demo/TimeZones/src/Columns.hs | 2 ++ demo/TimeZones/src/Main.hs | 15 +++++++++++---- demo/TimeZones/src/TimeIn.hs | 8 ++++++-- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Frames.cabal b/Frames.cabal index cfb21f0..1d881c4 100644 --- a/Frames.cabal +++ b/Frames.cabal @@ -227,6 +227,17 @@ executable modcsv hs-source-dirs: demo default-language: Haskell2010 +executable timezones + if !flag(demos) + buildable: False + main-is: Main.hs + other-modules: TimeIn Columns + if flag(demos) + build-depends: base, Frames, tz, text, time, readable, + pipes, pipes-safe, template-haskell + hs-source-dirs: demo/TimeZones/src + default-language: Haskell2010 + test-suite spec type: exitcode-stdio-1.0 hs-source-dirs: test diff --git a/demo/TimeZones/src/Columns.hs b/demo/TimeZones/src/Columns.hs index 59971b4..6c8189c 100644 --- a/demo/TimeZones/src/Columns.hs +++ b/demo/TimeZones/src/Columns.hs @@ -9,8 +9,10 @@ -- to parse data captured as 'Data.Time.LocalTime.LocalTime' values -- into the \"America/Chicago\" time zone. module Columns (MyColumns, TimeIn(..), Chicago(..)) where +import Data.Proxy (Proxy(..)) import Frames (CommonColumns) import Frames.ColumnTypeable (Parseable(..)) +import Frames.CSV (defaultSep, produceTokens) import TimeIn -- | Define a 'Parseable' instance for @TimeIn "America/Chicago"@ diff --git a/demo/TimeZones/src/Main.hs b/demo/TimeZones/src/Main.hs index 71dcee6..91b463e 100644 --- a/demo/TimeZones/src/Main.hs +++ b/demo/TimeZones/src/Main.hs @@ -1,19 +1,26 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} -- | Parse data including dates times in some implicit local time zone -- into an absolute time using a supplied time zone. module Main where +import Data.Proxy (Proxy(Proxy)) import Frames import Frames.CSV +import Frames.TH (RowGen(columnUniverse), colQ, rowGen) import Columns import Pipes (Producer, (>->), runEffect) import qualified Pipes.Prelude as P +import Pipes.Safe +import Frames (ColumnUniverse) +import Columns (MyColumns) -tableTypes' rowGen { columnUniverse = $(colQ ''MyColumns) } "users.csv" +-- tableTypes' rowGen { columnUniverse = $(colQ ''MyColumns) } "/Users/acowley/Projects/Frames/demo/TimeZones/users.csv" +tableTypes' ((rowGen "demo/TimeZones/users.csv") { columnUniverse = Proxy @MyColumns }) -loadUsers :: Producer Row IO () -loadUsers = readTable "users.csv" +loadUsers :: Producer Row (SafeT IO) () +loadUsers = readTable "demo/TimeZones/users.csv" main :: IO () -main = runEffect $ loadUsers >-> P.print +main = runSafeEffect $ loadUsers >-> P.print diff --git a/demo/TimeZones/src/TimeIn.hs b/demo/TimeZones/src/TimeIn.hs index 0674912..2bba16c 100644 --- a/demo/TimeZones/src/TimeIn.hs +++ b/demo/TimeZones/src/TimeIn.hs @@ -2,10 +2,11 @@ {-# LANGUAGE KindSignatures #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE RankNTypes #-} -- | Define the 'TimeIn' type that lets us specify in the type how a -- 'LocalTime' should be converted to a 'UTCTime'. module TimeIn where -import Control.Monad (MonadPlus, msum) +import Control.Monad (MonadPlus (mzero), msum) import qualified Data.Text as T import Data.Time.Clock import Data.Time.Format @@ -20,11 +21,14 @@ import Language.Haskell.TH -- whence it came. newtype TimeIn (zone :: Symbol) = TimeIn UTCTime deriving Show +failZero :: MonadPlus m => Maybe r -> m r +failZero = maybe mzero pure + -- | Try to parse a 'LocalTime' value using common formats. parseLocalTime :: MonadPlus m => T.Text -> m LocalTime parseLocalTime t = msum (map (($ T.unpack t) . mkParser) formats) where formats = ["%F %T", "%F"] - mkParser = parseTimeM True defaultTimeLocale + mkParser = (failZero .) . parseTimeM True defaultTimeLocale -- | @zonedTime "America/Chicago"@ will create a 'Parseable' instance -- for the type @TimeIn "America/Chicago"@. You can then use this type From acaf2e9a3d13b3bbaab8bc49981f0afc1aaf0ed7 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:24:37 -0400 Subject: [PATCH 4/9] Formatting --- src/Frames/CSV.hs | 536 +++++++++++++++++++++-------------- src/Frames/ColumnTypeable.hs | 3 +- src/Frames/ColumnUniverse.hs | 273 +++++++++++------- 3 files changed, 493 insertions(+), 319 deletions(-) diff --git a/src/Frames/CSV.hs b/src/Frames/CSV.hs index 147a12a..f565df1 100644 --- a/src/Frames/CSV.hs +++ b/src/Frames/CSV.hs @@ -1,19 +1,30 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveLift #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE CPP, DataKinds, DeriveLift, FlexibleContexts, FlexibleInstances, GADTs, - LambdaCase, OverloadedStrings, RankNTypes, - ScopedTypeVariables, TemplateHaskell, TypeApplications, - TypeOperators #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} + -- | Infer row types from comma-separated values (CSV) data and read -- that data from files. Template Haskell is used to generate the -- necessary types so that you can write type safe programs referring -- to those types. module Frames.CSV where -import Control.Exception (try, IOException) -import Control.Monad (when, unless) + +import Control.Exception (IOException, try) +import Control.Monad (unless, when) import qualified Data.ByteString.Char8 as B8 import qualified Data.Foldable as F import Data.List (intercalate) -import Data.Maybe (isNothing, fromMaybe) +import Data.Maybe (fromMaybe, isNothing) #if __GLASGOW_HASKELL__ < 808 import Data.Monoid ((<>)) #endif @@ -21,10 +32,9 @@ import Data.Proxy import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.IO as T -import Data.Vinyl (recordToList, Rec(..), ElField(..), RecordToList) -import Data.Vinyl (RecMapMethod, rmapMethod, RMap, rmap) +import Data.Vinyl (ElField (..), RMap, Rec (..), RecMapMethod, RecordToList, recordToList, rmap, rmapMethod) import Data.Vinyl.Class.Method (PayloadType) -import Data.Vinyl.Functor (Const(..), (:.), Compose(..)) +import Data.Vinyl.Functor (Compose (..), Const (..), (:.)) import Frames.Col import Frames.ColumnTypeable import Frames.Rec @@ -35,11 +45,11 @@ import Language.Haskell.TH import Language.Haskell.TH.Syntax import Pipes ((>->)) import qualified Pipes as P -import qualified Pipes.Prelude as P import qualified Pipes.Parse as P +import qualified Pipes.Prelude as P import qualified Pipes.Safe as P import qualified Pipes.Safe.Prelude as Safe -import System.IO (Handle, IOMode(ReadMode, WriteMode), hPrint, stderr) +import System.IO (Handle, IOMode (ReadMode, WriteMode), hPrint, stderr) -- * Parsing @@ -48,31 +58,35 @@ type Separator = T.Text type QuoteChar = Char data QuotingMode - -- | No quoting enabled. The separator may not appear in values - = NoQuoting - -- | Quoted values with the given quoting character. Quotes are escaped by doubling them. - -- Mostly RFC4180 compliant, except doesn't support newlines in values - | RFC4180Quoting QuoteChar - deriving (Eq, Show, Lift) - -data ParserOptions = ParserOptions { headerOverride :: Maybe [T.Text] - , columnSeparator :: Separator - , quotingMode :: QuotingMode } - deriving (Eq, Show) + = -- | No quoting enabled. The separator may not appear in values + NoQuoting + | -- | Quoted values with the given quoting character. Quotes are escaped by doubling them. + -- Mostly RFC4180 compliant, except doesn't support newlines in values + RFC4180Quoting QuoteChar + deriving (Eq, Show, Lift) + +data ParserOptions = ParserOptions + { headerOverride :: Maybe [T.Text] + , columnSeparator :: Separator + , quotingMode :: QuotingMode + } + deriving (Eq, Show) instance Lift ParserOptions where - lift (ParserOptions Nothing sep quoting) = [|ParserOptions Nothing $sep' $quoting'|] - where sep' = [|T.pack $(stringE $ T.unpack sep)|] - quoting' = lift quoting - lift (ParserOptions (Just hs) sep quoting) = [|ParserOptions (Just $hs') $sep' $quoting'|] - where sep' = [|T.pack $(stringE $ T.unpack sep)|] - hs' = [|map T.pack $(listE $ map (stringE . T.unpack) hs)|] - quoting' = lift quoting + lift (ParserOptions Nothing sep quoting) = [|ParserOptions Nothing $sep' $quoting'|] + where + sep' = [|T.pack $(stringE $ T.unpack sep)|] + quoting' = lift quoting + lift (ParserOptions (Just hs) sep quoting) = [|ParserOptions (Just $hs') $sep' $quoting'|] + where + sep' = [|T.pack $(stringE $ T.unpack sep)|] + hs' = [|map T.pack $(listE $ map (stringE . T.unpack) hs)|] + quoting' = lift quoting #if MIN_VERSION_template_haskell(2,16,0) #if MIN_VERSION_template_haskell(2,17,0) - liftTyped = liftCode . unsafeTExpCoerce . lift + liftTyped = liftCode . unsafeTExpCoerce . lift #else - liftTyped = unsafeTExpCoerce . lift + liftTyped = unsafeTExpCoerce . lift #endif #endif @@ -88,49 +102,51 @@ defaultSep = T.pack "," -- | Helper to split a 'T.Text' on commas and strip leading and -- trailing whitespace from each resulting chunk. tokenizeRow :: ParserOptions -> T.Text -> [T.Text] -tokenizeRow options = - handleQuoting . T.splitOn sep - where sep = columnSeparator options - quoting = quotingMode options - handleQuoting = case quoting of - NoQuoting -> id - RFC4180Quoting quote -> reassembleRFC4180QuotedParts sep quote +tokenizeRow options = handleQuoting . T.splitOn sep + where + sep = columnSeparator options + quoting = quotingMode options + handleQuoting = case quoting of + NoQuoting -> id + RFC4180Quoting quote -> reassembleRFC4180QuotedParts sep quote -- | Post processing applied to a list of tokens split by the -- separator which should have quoted sections reassembled reassembleRFC4180QuotedParts :: Separator -> QuoteChar -> [T.Text] -> [T.Text] reassembleRFC4180QuotedParts sep quoteChar = go - where go [] = [] - go (part:parts) - | T.null part = T.empty : go parts - | prefixQuoted part = + where + go [] = [] + go (part : parts) + | T.null part = T.empty : go parts + | prefixQuoted part = if suffixQuoted part - then unescape (T.drop 1 . T.dropEnd 1 $ part) : go parts - else case break suffixQuoted parts of - (h,[]) -> [unescape (T.intercalate sep (T.drop 1 part : h))] - (h,t:ts) -> unescape - (T.intercalate - sep - (T.drop 1 part : h ++ [T.dropEnd 1 t])) - : go ts - | otherwise = T.strip part : go parts - - prefixQuoted t = - T.head t == quoteChar-- && - -- T.length (T.takeWhile (== quoteChar) t) `rem` 2 == 1 - - suffixQuoted t = - quoteText `T.isSuffixOf` t-- && - -- T.length (T.takeWhileEnd (== quoteChar) t) `rem` 2 == 1 - - quoteText = T.singleton quoteChar - - unescape :: T.Text -> T.Text - unescape = T.replace q2 quoteText - where q2 = quoteText <> quoteText - ---tokenizeRow :: Separator -> T.Text -> [T.Text] ---tokenizeRow sep = map (unquote . T.strip) . T.splitOn sep + then unescape (T.drop 1 . T.dropEnd 1 $ part) : go parts + else case break suffixQuoted parts of + (h, []) -> [unescape (T.intercalate sep (T.drop 1 part : h))] + (h, t : ts) -> + unescape + ( T.intercalate + sep + (T.drop 1 part : h ++ [T.dropEnd 1 t]) + ) + : go ts + | otherwise = T.strip part : go parts + + prefixQuoted t = + T.head t == quoteChar -- && + -- T.length (T.takeWhile (== quoteChar) t) `rem` 2 == 1 + suffixQuoted t = + quoteText `T.isSuffixOf` t -- && + -- T.length (T.takeWhileEnd (== quoteChar) t) `rem` 2 == 1 + quoteText = T.singleton quoteChar + + unescape :: T.Text -> T.Text + unescape = T.replace q2 quoteText + where + q2 = quoteText <> quoteText + +-- tokenizeRow :: Separator -> T.Text -> [T.Text] +-- tokenizeRow sep = map (unquote . T.strip) . T.splitOn sep -- where unquote txt -- | quoted txt = case T.dropEnd 1 (T.drop 1 txt) of -- txt' | T.null txt' -> "Col" @@ -145,143 +161,180 @@ reassembleRFC4180QuotedParts sep quoteChar = go -- | Infer column types from a prefix (up to 1000 lines) of a CSV -- file. -prefixInference :: (ColumnTypeable a, Monoid a, Monad m) - => P.Parser [T.Text] m [a] -prefixInference = P.draw >>= \case - Nothing -> return [] - Just row1 -> P.foldAll (\ts -> zipWith (<>) ts . inferCols) - (inferCols row1) - id - where inferCols = map inferType +prefixInference :: + (ColumnTypeable a, Semigroup a, Monad m, Show a) => + P.Parser [T.Text] m [a] +prefixInference = + P.draw >>= \case + Nothing -> return [] + Just row1 -> + P.foldAll + (\ts -> zipWith (<>) ts . inferCols) + (inferCols row1) + id + where + inferCols = map inferType -- | Extract column names and inferred types from a CSV file. -readColHeaders :: (ColumnTypeable a, Monoid a, Monad m) - => ParserOptions -> P.Producer [T.Text] m () -> m [(T.Text, a)] +readColHeaders :: + (ColumnTypeable a, Semigroup a, Monad m, Show a) => + ParserOptions + -> P.Producer [T.Text] m () + -> m [(T.Text, a)] readColHeaders opts = P.evalStateT $ - do headerRow <- maybe (fromMaybe err <$> P.draw) - pure - (headerOverride opts) - colTypes <- prefixInference - unless (length headerRow == length colTypes) (error errNumColumns) - return (zip headerRow colTypes) - where err = error "Empty Producer has no header row" - errNumColumns = - unlines - [ "" - , "Error parsing CSV: " - , " Number of columns in header differs from number of columns" - , " found in the remaining file. This may be due to newlines" - , " being present within the data itself (not just separating" - , " rows). If support for embedded newlines is required, " - , " consider using the Frames-dsv package in conjunction with" - , " Frames to make use of a different CSV parser."] + do + headerRow <- + maybe + (fromMaybe err <$> P.draw) + pure + (headerOverride opts) + colTypes <- prefixInference + unless (length headerRow == length colTypes) (error errNumColumns) + return (zip headerRow colTypes) + where + err = error "Empty Producer has no header row" + errNumColumns = + unlines + [ "" + , "Error parsing CSV: " + , " Number of columns in header differs from number of columns" + , " found in the remaining file. This may be due to newlines" + , " being present within the data itself (not just separating" + , " rows). If support for embedded newlines is required, " + , " consider using the Frames-dsv package in conjunction with" + , " Frames to make use of a different CSV parser." + ] -- * Loading CSV Data -- | Parsing each component of a 'RecF' from a list of text chunks, -- one chunk per record component. class ReadRec rs where - readRec :: [T.Text] -> Rec (Either T.Text :. ElField) rs + readRec :: [T.Text] -> Rec (Either T.Text :. ElField) rs instance ReadRec '[] where - readRec _ = RNil + readRec _ = RNil instance (Parseable t, ReadRec ts, KnownSymbol s) => ReadRec (s :-> t ': ts) where - readRec [] = Compose (Left mempty) :& readRec [] - readRec (h:t) = maybe (Compose (Left (T.copy h))) - (Compose . Right . Field) - (parse' h) :& readRec t + readRec [] = Compose (Left mempty) :& readRec [] + readRec (h : t) = + maybe + (Compose (Left (T.copy h))) + (Compose . Right . Field) + (parse' h) + :& readRec t -- | Opens a file (in 'P.MonadSafe') and repeatedly applies the given -- function to the 'Handle' to obtain lines to yield. Adapted from the -- moribund pipes-text package. -pipeLines :: P.MonadSafe m - => (Handle -> IO (Either IOException T.Text)) - -> FilePath - -> P.Producer T.Text m () +pipeLines :: + (P.MonadSafe m) => + (Handle -> IO (Either IOException T.Text)) + -> FilePath + -> P.Producer T.Text m () pipeLines pgetLine fp = Safe.withFile fp ReadMode $ \h -> - let loop = do txt <- P.liftIO (pgetLine h) - case txt of - Left _e -> return () - Right y -> P.yield y >> loop - in loop + let loop = do + txt <- P.liftIO (pgetLine h) + case txt of + Left _e -> return () + Right y -> P.yield y >> loop + in loop -- | Produce lines of 'T.Text'. -produceTextLines :: P.MonadSafe m => FilePath -> P.Producer T.Text m () +produceTextLines :: (P.MonadSafe m) => FilePath -> P.Producer T.Text m () produceTextLines = pipeLines (try . T.hGetLine) -- | Produce lines of tokens that were separated by the given -- separator. -produceTokens :: P.MonadSafe m - => FilePath - -> Separator - -> P.Producer [T.Text] m () +produceTokens :: + (P.MonadSafe m) => + FilePath + -> Separator + -> P.Producer [T.Text] m () produceTokens fp sep = produceTextLines fp >-> P.map tokenize - where tokenize = tokenizeRow popts - popts = defaultParser { columnSeparator = sep } + where + tokenize = tokenizeRow popts + popts = defaultParser{columnSeparator = sep} -- | Consume lines of 'T.Text', writing them to a file. -consumeTextLines :: P.MonadSafe m => FilePath -> P.Consumer T.Text m r +consumeTextLines :: (P.MonadSafe m) => FilePath -> P.Consumer T.Text m r consumeTextLines fp = Safe.withFile fp WriteMode $ \h -> - let loop = P.await >>= P.liftIO . T.hPutStrLn h >> loop - in loop + let loop = P.await >>= P.liftIO . T.hPutStrLn h >> loop + in loop -- | Produce the lines of a latin1 (or ISO8859 Part 1) encoded file as -- ’T.Text’ values. -readFileLatin1Ln :: P.MonadSafe m => FilePath -> P.Producer [T.Text] m () -readFileLatin1Ln fp = pipeLines (try . fmap T.decodeLatin1 . B8.hGetLine) fp - >-> P.map (tokenizeRow defaultParser) +readFileLatin1Ln :: (P.MonadSafe m) => FilePath -> P.Producer [T.Text] m () +readFileLatin1Ln fp = + pipeLines (try . fmap T.decodeLatin1 . B8.hGetLine) fp + >-> P.map (tokenizeRow defaultParser) -- | Read a 'RecF' from one line of CSV. -readRow :: ReadRec rs - => ParserOptions -> T.Text -> Rec (Either T.Text :. ElField) rs +readRow :: + (ReadRec rs) => + ParserOptions + -> T.Text + -> Rec (Either T.Text :. ElField) rs readRow = (readRec .) . tokenizeRow -- | Produce rows where any given entry can fail to parse. -readTableMaybeOpt :: (P.MonadSafe m, ReadRec rs, RMap rs) - => ParserOptions - -> FilePath - -> P.Producer (Rec (Maybe :. ElField) rs) m () +readTableMaybeOpt :: + (P.MonadSafe m, ReadRec rs, RMap rs) => + ParserOptions + -> FilePath + -> P.Producer (Rec (Maybe :. ElField) rs) m () readTableMaybeOpt opts csvFile = - produceTokens csvFile (columnSeparator opts) >-> pipeTableMaybeOpt opts + produceTokens csvFile (columnSeparator opts) >-> pipeTableMaybeOpt opts -- | Stream lines of CSV data into rows of ’Rec’ values values where -- any given entry can fail to parse. -pipeTableMaybeOpt :: (Monad m, ReadRec rs, RMap rs) - => ParserOptions - -> P.Pipe [T.Text] (Rec (Maybe :. ElField) rs) m () +pipeTableMaybeOpt :: + (Monad m, ReadRec rs, RMap rs) => + ParserOptions + -> P.Pipe [T.Text] (Rec (Maybe :. ElField) rs) m () pipeTableMaybeOpt opts = do - when (isNothing (headerOverride opts)) (() <$ P.await) - P.map (rmap (either (const (Compose Nothing)) - (Compose . Just) . getCompose) - . readRec) + when (isNothing (headerOverride opts)) (() <$ P.await) + P.map + ( rmap + ( either + (const (Compose Nothing)) + (Compose . Just) + . getCompose + ) + . readRec + ) -- | Stream lines of CSV data into rows of ’Rec’ values values where -- any given entry can fail to parse. In the case of a parse failure, the -- raw 'T.Text' of that entry is retained. -pipeTableEitherOpt :: (Monad m, ReadRec rs) - => ParserOptions - -> P.Pipe T.Text (Rec (Either T.Text :. ElField) rs) m () +pipeTableEitherOpt :: + (Monad m, ReadRec rs) => + ParserOptions + -> P.Pipe T.Text (Rec (Either T.Text :. ElField) rs) m () pipeTableEitherOpt opts = do - when (isNothing (headerOverride opts)) (() <$ P.await) - P.map (readRow opts) + when (isNothing (headerOverride opts)) (() <$ P.await) + P.map (readRow opts) -- | Produce rows where any given entry can fail to parse. -readTableMaybe :: (P.MonadSafe m, ReadRec rs, RMap rs) - => FilePath -> P.Producer (Rec (Maybe :. ElField) rs) m () +readTableMaybe :: + (P.MonadSafe m, ReadRec rs, RMap rs) => + FilePath + -> P.Producer (Rec (Maybe :. ElField) rs) m () readTableMaybe = readTableMaybeOpt defaultParser -- | Stream lines of CSV data into rows of ’Rec’ values where any -- given entry can fail to parse. -pipeTableMaybe :: (Monad m, ReadRec rs, RMap rs) - => P.Pipe [T.Text] (Rec (Maybe :. ElField) rs) m () +pipeTableMaybe :: + (Monad m, ReadRec rs, RMap rs) => + P.Pipe [T.Text] (Rec (Maybe :. ElField) rs) m () pipeTableMaybe = pipeTableMaybeOpt defaultParser -- | Stream lines of CSV data into rows of ’Rec’ values where any -- given entry can fail to parse. In the case of a parse failure, the -- raw 'T.Text' of that entry is retained. -pipeTableEither :: (Monad m, ReadRec rs) - => P.Pipe T.Text (Rec (Either T.Text :. ElField) rs) m () +pipeTableEither :: + (Monad m, ReadRec rs) => + P.Pipe T.Text (Rec (Either T.Text :. ElField) rs) m () pipeTableEither = pipeTableEitherOpt defaultParser -- -- | Returns a `MonadPlus` producer of rows for which each column was @@ -308,86 +361,124 @@ pipeTableEither = pipeTableEitherOpt defaultParser -- | Returns a producer of rows for which each column was successfully -- parsed. -readTableOpt :: (P.MonadSafe m, ReadRec rs, RMap rs) - => ParserOptions -> FilePath -> P.Producer (Record rs) m () +readTableOpt :: + (P.MonadSafe m, ReadRec rs, RMap rs) => + ParserOptions + -> FilePath + -> P.Producer (Record rs) m () readTableOpt opts csvFile = readTableMaybeOpt opts csvFile P.>-> go - where go = P.await >>= maybe go (\x -> P.yield x >> go) . recMaybe + where + go = P.await >>= maybe go (\x -> P.yield x >> go) . recMaybe -- | Pipe lines of CSV text into rows for which each column was -- successfully parsed. -pipeTableOpt :: (ReadRec rs, RMap rs, Monad m) - => ParserOptions -> P.Pipe [T.Text] (Record rs) m () +pipeTableOpt :: + (ReadRec rs, RMap rs, Monad m) => + ParserOptions + -> P.Pipe [T.Text] (Record rs) m () pipeTableOpt opts = pipeTableMaybeOpt opts >-> P.map recMaybe >-> P.concat -- | Returns a producer of rows for which each column was successfully -- parsed. -readTable :: (P.MonadSafe m, ReadRec rs, RMap rs) - => FilePath -> P.Producer (Record rs) m () +readTable :: + (P.MonadSafe m, ReadRec rs, RMap rs) => + FilePath + -> P.Producer (Record rs) m () readTable = readTableOpt defaultParser -readRecEither :: (ReadRec rs, RMap rs) - => [T.Text] -> Either (Rec (Either T.Text :. ElField) rs) (Record rs) -readRecEither tokens = let tmp = readRec tokens - in case rtraverse getCompose tmp of - Right r -> Right r - _ -> Left tmp +readRecEither :: + (ReadRec rs, RMap rs) => + [T.Text] + -> Either (Rec (Either T.Text :. ElField) rs) (Record rs) +readRecEither tokens = + let tmp = readRec tokens + in case rtraverse getCompose tmp of + Right r -> Right r + _ -> Left tmp -- | Similar to 'readTable' except that rows that fail to parse are -- printed to @stderr@ with columns that failed to parse printed as -- @"Left rawtext"@ while those that were successfully parsed are -- shown as @"Right text"@. -readTableDebug :: forall m rs. - (P.MonadSafe m, ReadRec rs, RMap rs, - RecMapMethod ShowCSV (Either T.Text :. ElField) rs, - RecordToList rs) - => FilePath -> P.Producer (Record rs) m () +readTableDebug :: + forall m rs. + ( P.MonadSafe m + , ReadRec rs + , RMap rs + , RecMapMethod ShowCSV (Either T.Text :. ElField) rs + , RecordToList rs + ) => + FilePath + -> P.Producer (Record rs) m () readTableDebug csvFile = - produceTokens csvFile (columnSeparator opts) >-> go >-> debugAll - where opts = defaultParser - go = do - when (isNothing (headerOverride opts)) (() <$ P.await) - P.map readRecEither - debugAll = do - P.await >>= either (P.liftIO . hPrint stderr . debugOne) P.yield - debugAll - debugOne = recordToList . rmapMethod @ShowCSV (aux . getCompose) - aux :: (ShowCSV (PayloadType ElField a)) - => Either T.Text (ElField a) -> Const T.Text a - aux (Right (Field x)) = Const ("Right " <> showCSV x) - aux (Left txt) = Const ("Left " <> txt) + produceTokens csvFile (columnSeparator opts) >-> go >-> debugAll + where + opts = defaultParser + go = do + when (isNothing (headerOverride opts)) (() <$ P.await) + P.map readRecEither + debugAll = do + P.await >>= either (P.liftIO . hPrint stderr . debugOne) P.yield + debugAll + debugOne = recordToList . rmapMethod @ShowCSV (aux . getCompose) + aux :: + (ShowCSV (PayloadType ElField a)) => + Either T.Text (ElField a) + -> Const T.Text a + aux (Right (Field x)) = Const ("Right " <> showCSV x) + aux (Left txt) = Const ("Left " <> txt) -- | Pipe lines of CSV text into rows for which each column was -- successfully parsed. -pipeTable :: (ReadRec rs, RMap rs, Monad m) - => P.Pipe [T.Text] (Record rs) m () +pipeTable :: + (ReadRec rs, RMap rs, Monad m) => + P.Pipe [T.Text] (Record rs) m () pipeTable = pipeTableOpt defaultParser -- * Writing CSV Data -showFieldsCSV :: (RecMapMethod ShowCSV ElField ts, RecordToList ts) - => Record ts -> [T.Text] +showFieldsCSV :: + (RecMapMethod ShowCSV ElField ts, RecordToList ts) => + Record ts + -> [T.Text] showFieldsCSV = recordToList . rmapMethod @ShowCSV aux - where aux :: (ShowCSV (PayloadType ElField a)) - => ElField a -> Const T.Text a - aux (Field x) = Const (showCSV x) + where + aux :: + (ShowCSV (PayloadType ElField a)) => + ElField a + -> Const T.Text a + aux (Field x) = Const (showCSV x) -- | 'P.yield' a header row with column names followed by a line of -- text for each 'Record' with each field separated by a comma. If -- your source of 'Record' values is a 'P.Producer', consider using -- 'pipeToCSV' to keep everything streaming. -produceCSV :: forall f ts m. - (ColumnHeaders ts, Foldable f, Monad m, RecordToList ts, - RecMapMethod ShowCSV ElField ts) - => f (Record ts) -> P.Producer String m () +produceCSV :: + forall f ts m. + ( ColumnHeaders ts + , Foldable f + , Monad m + , RecordToList ts + , RecMapMethod ShowCSV ElField ts + ) => + f (Record ts) + -> P.Producer String m () produceCSV = produceDSV defaultParser -produceDSV :: forall f ts m. - (ColumnHeaders ts, Foldable f, Monad m, RecordToList ts, - RecMapMethod ShowCSV ElField ts) - => ParserOptions -> f (Record ts) -> P.Producer String m () +produceDSV :: + forall f ts m. + ( ColumnHeaders ts + , Foldable f + , Monad m + , RecordToList ts + , RecMapMethod ShowCSV ElField ts + ) => + ParserOptions + -> f (Record ts) + -> P.Producer String m () produceDSV opts recs = do - P.yield (intercalate (T.unpack separator) (columnHeaders (Proxy :: Proxy (Record ts)))) - F.mapM_ (P.yield . T.unpack . T.intercalate separator . showFieldsCSV) recs + P.yield (intercalate (T.unpack separator) (columnHeaders (Proxy :: Proxy (Record ts)))) + F.mapM_ (P.yield . T.unpack . T.intercalate separator . showFieldsCSV) recs where separator = columnSeparator opts @@ -395,26 +486,45 @@ produceDSV opts recs = do -- text for each 'Record' with each field separated by a comma. This -- is the same as 'produceCSV', but adapted for cases where you have -- streaming input that you wish to use to produce streaming output. -pipeToCSV :: forall ts m. - (Monad m, ColumnHeaders ts, RecordToList ts, - RecMapMethod ShowCSV ElField ts) - => P.Pipe (Record ts) T.Text m () +pipeToCSV :: + forall ts m. + ( Monad m + , ColumnHeaders ts + , RecordToList ts + , RecMapMethod ShowCSV ElField ts + ) => + P.Pipe (Record ts) T.Text m () pipeToCSV = P.yield (T.intercalate "," (map T.pack header)) >> go - where header = columnHeaders (Proxy :: Proxy (Record ts)) - go :: P.Pipe (Record ts) T.Text m () - go = P.map (T.intercalate "," . showFieldsCSV) + where + header = columnHeaders (Proxy :: Proxy (Record ts)) + go :: P.Pipe (Record ts) T.Text m () + go = P.map (T.intercalate "," . showFieldsCSV) -- | Write a header row with column names followed by a line of text -- for each 'Record' to the given file. -writeCSV :: (ColumnHeaders ts, Foldable f, RecordToList ts, - RecMapMethod ShowCSV ElField ts) - => FilePath -> f (Record ts) -> IO () -writeCSV = writeDSV defaultParser +writeCSV :: + ( ColumnHeaders ts + , Foldable f + , RecordToList ts + , RecMapMethod ShowCSV ElField ts + ) => + FilePath + -> f (Record ts) + -> IO () +writeCSV = writeDSV defaultParser -- | Write a header row with column names followed by a line of text -- for each 'Record' to the given file. -writeDSV :: (ColumnHeaders ts, Foldable f, RecordToList ts, - RecMapMethod ShowCSV ElField ts) - => ParserOptions -> FilePath -> f (Record ts) -> IO () -writeDSV opts fp recs = P.runSafeT . P.runEffect $ - produceDSV opts recs >-> P.map T.pack >-> consumeTextLines fp +writeDSV :: + ( ColumnHeaders ts + , Foldable f + , RecordToList ts + , RecMapMethod ShowCSV ElField ts + ) => + ParserOptions + -> FilePath + -> f (Record ts) + -> IO () +writeDSV opts fp recs = + P.runSafeT . P.runEffect $ + produceDSV opts recs >-> P.map T.pack >-> consumeTextLines fp diff --git a/src/Frames/ColumnTypeable.hs b/src/Frames/ColumnTypeable.hs index 021aa0b..048eeb0 100644 --- a/src/Frames/ColumnTypeable.hs +++ b/src/Frames/ColumnTypeable.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE BangPatterns, DefaultSignatures, LambdaCase, - ScopedTypeVariables #-} +{-# LANGUAGE DefaultSignatures, ScopedTypeVariables #-} module Frames.ColumnTypeable where import Control.Monad (MonadPlus) import Data.Maybe (fromMaybe) diff --git a/src/Frames/ColumnUniverse.hs b/src/Frames/ColumnUniverse.hs index a660ca4..725a606 100644 --- a/src/Frames/ColumnUniverse.hs +++ b/src/Frames/ColumnUniverse.hs @@ -1,161 +1,226 @@ -{-# LANGUAGE BangPatterns, CPP, ConstraintKinds, DataKinds, - FlexibleContexts, FlexibleInstances, GADTs, InstanceSigs, - KindSignatures, LambdaCase, MultiParamTypeClasses, - OverloadedStrings, QuasiQuotes, RankNTypes, - ScopedTypeVariables, TemplateHaskell, TypeApplications, - TypeFamilies, TypeOperators, UndecidableInstances #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE InstanceSigs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} + module Frames.ColumnUniverse ( - CoRec, Columns, ColumnUniverse, ColInfo, - CommonColumns, CommonColumnsCat, parsedTypeRep + CoRec, + Columns, + ColumnUniverse, + ColInfo, + CommonColumns, + CommonColumnsCat, + parsedTypeRep, ) where + import Data.Maybe (fromMaybe) #if __GLASGOW_HASKELL__ < 808 import Data.Semigroup (Semigroup((<>))) #endif +import Data.Either (fromRight) import qualified Data.Text as T import Data.Vinyl import Data.Vinyl.CoRec import Data.Vinyl.Functor -import Data.Vinyl.TypeLevel (RIndex, NatToInt) -import Frames.ColumnTypeable +import Data.Vinyl.TypeLevel (NatToInt, RIndex) import Frames.Categorical +import Frames.ColumnTypeable import Language.Haskell.TH -- | Extract a function to test whether some value of a given type -- could be read from some 'T.Text'. -inferParseable :: Parseable a => T.Text -> (Maybe :. Parsed) a +inferParseable :: (Parseable a) => T.Text -> (Maybe :. Parsed) a inferParseable = Compose . parse -- | Helper to call 'inferParseable' on variants of a 'CoRec'. -inferParseable' :: Parseable a => (((->) T.Text) :. (Maybe :. Parsed)) a +inferParseable' :: (Parseable a) => ((->) T.Text :. (Maybe :. Parsed)) a inferParseable' = Compose inferParseable -- * Record Helpers -tryParseAll :: forall ts. (RecApplicative ts, RPureConstrained Parseable ts) - => T.Text -> Rec (Maybe :. Parsed) ts +tryParseAll :: + forall ts. + (RecApplicative ts, RPureConstrained Parseable ts) => + T.Text + -> Rec (Maybe :. Parsed) ts tryParseAll = rtraverse getCompose funs - where funs :: Rec (((->) T.Text) :. (Maybe :. Parsed)) ts - funs = rpureConstrained @Parseable inferParseable' + where + funs :: Rec (((->) T.Text) :. (Maybe :. Parsed)) ts + funs = rpureConstrained @Parseable inferParseable' -- * Column Type Inference -- | Information necessary for synthesizing row types and comparing -- types. newtype ColInfo a = ColInfo (Either (String -> Q [Dec]) Type, Parsed a) -instance Show a => Show (ColInfo a) where - show (ColInfo (t,p)) = "(ColInfo {" - ++ either (const "cat") show t - ++ ", " - ++ show (discardConfidence p) ++"})" -parsedToColInfo :: Parseable a => Parsed a -> ColInfo a +instance (Show a) => Show (ColInfo a) where + show (ColInfo (t, p)) = + "(ColInfo {" + ++ either (const "cat") show t + ++ ", " + ++ show (discardConfidence p) + ++ "})" + +parsedToColInfo :: (Parseable a) => Parsed a -> ColInfo a parsedToColInfo x = case getConst rep of - Left dec -> ColInfo (Left dec, x) - Right ty -> - ColInfo (Right ty, x) - where rep = representableAsType x + Left dec -> ColInfo (Left dec, x) + Right ty -> + ColInfo (Right ty, x) + where + rep = representableAsType x parsedTypeRep :: ColInfo a -> Parsed Type -parsedTypeRep (ColInfo (t,p)) = - const (either (const (ConT (mkName "Categorical"))) id t) <$> p +parsedTypeRep (ColInfo (t, p)) = + fromRight (ConT (mkName "Categorical")) t <$ p -- | Map 'Type's we know about (with a special treatment of -- synthesized types for categorical variables) to 'Int's for ordering -- purposes. orderParsePriorities :: Parsed (Maybe Type) -> Maybe Int orderParsePriorities x = - case discardConfidence x of - Nothing -> Just 1 -- categorical variable - Just t - | t == tyText -> Just (0 + uncertainty) - | t == tyDbl -> Just (2 + uncertainty) - | t == tyInt -> Just (3 + uncertainty) - | t == tyBool -> Just (4 + uncertainty) - | otherwise -> Nothing - where tyText = ConT (mkName "Text") - tyDbl = ConT (mkName "Double") - tyInt = ConT (mkName "Int") - tyBool = ConT (mkName "Bool") - uncertainty = case x of Definitely _ -> 0; Possibly _ -> 5 + case discardConfidence x of + Nothing -> Just (1 + 6) -- categorical variable + Just t + | t == tyText -> Just (0 + uncertainty) + | t == tyDbl -> Just (2 + uncertainty) + | t == tyInt -> Just (3 + uncertainty) + | t == tyBool -> Just (4 + uncertainty) + | otherwise -> Just (5 + uncertainty) -- Unknown type + where + tyText = ConT (mkName "Text") + tyDbl = ConT (mkName "Double") + tyInt = ConT (mkName "Int") + tyBool = ConT (mkName "Bool") + uncertainty = case x of Definitely _ -> 0; Possibly _ -> 6 -- | We use a join semi-lattice on types for representations. The --- bottom of the lattice is effectively an error (we have nothing to --- represent), @Bool < Int@, @Int < Double@, and @forall n. n <= Text@. +-- bottom of the lattice is effectively an error (we have nothing to +-- represent), @Bool < Int@, @Int < Double@, and @forall n. n <= Text@. -- --- The high-level goal here is that we will pick the "greater" of two --- choices in 'bestRep'. A 'Definitely' parse result is preferred over --- a 'Possibly' parse result. If we have two distinct 'Possibly' parse --- results, we give up. If we have two distinct 'Definitely' parse --- results, we are in dangerous waters: all data is parseable at --- /both/ types, so which do we default to? The defaulting choices --- made here are described in the previous paragraph. If there is no --- defaulting rule, we give up (i.e. use 'T.Text' as a --- representation). +-- The high-level goal here is that we will pick the "greater" of two +-- choices in 'bestRep'. A 'Definitely' parse result is preferred over +-- a 'Possibly' parse result. If we have two distinct 'Possibly' parse +-- results, we give up. If we have two distinct 'Definitely' parse +-- results, we are in dangerous waters: all data is parseable at +-- /both/ types, so which do we default to? The defaulting choices +-- made here are described in the previous paragraph. If there is no +-- defaulting rule, we give up (i.e. use 'T.Text' as a +-- representation). lubTypes :: Parsed (Maybe Type) -> Parsed (Maybe Type) -> Maybe Ordering lubTypes x y = compare <$> orderParsePriorities y <*> orderParsePriorities x -instance (T.Text ∈ ts, RPureConstrained Parseable ts) => Monoid (CoRec ColInfo ts) where - mempty = CoRec (ColInfo ( Right (ConT (mkName "Text")), Possibly T.empty)) +-- instance (T.Text ∈ ts, RPureConstrained Parseable ts) => Monoid (CoRec ColInfo ts) where +-- mempty = CoRec (ColInfo (Right (ConT (mkName "Text")), Possibly T.empty)) -- | A helper For the 'Semigroup' instance below. -mergeEqTypeParses :: forall ts. (RPureConstrained Parseable ts, T.Text ∈ ts) - => CoRec ColInfo ts -> CoRec ColInfo ts -> CoRec ColInfo ts -mergeEqTypeParses x@(CoRec _) y = fromMaybe definitelyText - $ coRecTraverse getCompose - (coRecMapC @Parseable aux x) - where definitelyText = CoRec (ColInfo (Right (ConT (mkName "Text")), Definitely T.empty)) - aux :: forall a. (Parseable a, NatToInt (RIndex a ts)) - => ColInfo a -> (Maybe :. ColInfo) a - aux (ColInfo (_, pX)) = - case asA' @a y of +mergeEqTypeParses :: + forall ts. + (RPureConstrained Parseable ts, T.Text ∈ ts) => + CoRec ColInfo ts + -> CoRec ColInfo ts + -> CoRec ColInfo ts +mergeEqTypeParses x@(CoRec _) y = + fromMaybe definitelyText $ + coRecTraverse + getCompose + (coRecMapC @Parseable aux x) + where + definitelyText = CoRec (ColInfo (Right (ConT (mkName "Text")), Definitely T.empty)) + aux :: + forall a. + (Parseable a, NatToInt (RIndex a ts)) => + ColInfo a + -> (Maybe :. ColInfo) a + aux (ColInfo (_, pX)) = + case asA' @a y of Nothing -> Compose Nothing Just (ColInfo (_, pY)) -> - maybe (Compose Nothing) + maybe + (Compose Nothing) (Compose . Just . parsedToColInfo) (parseCombine pX pY) -instance (T.Text ∈ ts, RPureConstrained Parseable ts) - => Semigroup (CoRec ColInfo ts) where - x@(CoRec (ColInfo (tyX, pX))) <> y@(CoRec (ColInfo (tyY, pY))) = - case lubTypes (const (either (const Nothing) Just tyX) <$> pX) - (const (either (const Nothing) Just tyY) <$> pY) of - Just GT -> x - Just LT -> y - Just EQ -> mergeEqTypeParses x y - Nothing -> mempty +instance + (T.Text ∈ ts, RPureConstrained Parseable ts) => + Semigroup (CoRec ColInfo ts) + where + (<>) :: (T.Text ∈ ts, RPureConstrained Parseable ts) => CoRec ColInfo ts -> CoRec ColInfo ts -> CoRec ColInfo ts + x@(CoRec (ColInfo (tyX, pX))) <> y@(CoRec (ColInfo (tyY, pY))) = + case lubTypes + (either (const Nothing) Just tyX <$ pX) + (either (const Nothing) Just tyY <$ pY) of + Just GT -> x + Just LT -> y + Just EQ -> mergeEqTypeParses x y + Nothing -> undefined -- mempty -- | Find the best (i.e. smallest) 'CoRec' variant to represent a --- parsed value. For inspection in GHCi after loading this module, --- consider this example: +-- parsed value. For inspection in GHCi after loading this module, +-- consider this example: -- --- >>> :set -XTypeApplications --- >>> :set -XOverloadedStrings --- >>> import Data.Vinyl.CoRec (foldCoRec) --- >>> foldCoRec parsedTypeRep (bestRep @CommonColumns "2.3") --- Definitely Double -bestRep :: forall ts. - (RPureConstrained Parseable ts, - FoldRec ts ts, - RecApplicative ts, T.Text ∈ ts) - => T.Text -> CoRec ColInfo ts +-- >>> :set -XTypeApplications +-- >>> :set -XOverloadedStrings +-- >>> import Data.Vinyl.CoRec (foldCoRec) +-- >>> foldCoRec parsedTypeRep (bestRep @CommonColumns "2.3") +-- Definitely Double +bestRep :: + forall ts. + ( RPureConstrained Parseable ts + , RPureConstrained (ShowF ColInfo) ts + , FoldRec ts ts + , RecApplicative ts + , T.Text ∈ ts + ) => + T.Text + -> CoRec ColInfo ts bestRep t - | T.null t || t == "NA" = (CoRec (parsedToColInfo (Possibly T.empty))) - | otherwise = coRecMapC @Parseable parsedToColInfo - . fromMaybe (CoRec (Possibly T.empty :: Parsed T.Text)) - . firstField - . (tryParseAll :: T.Text -> Rec (Maybe :. Parsed) ts) - $ t -{-# INLINABLE bestRep #-} - -instance (RPureConstrained Parseable ts, FoldRec ts ts, - RecApplicative ts, T.Text ∈ ts) => - ColumnTypeable (CoRec ColInfo ts) where - colType (CoRec (ColInfo (t, _))) = t - {-# INLINE colType #-} - inferType = bestRep - {-# INLINABLE inferType #-} + -- \| trace (show (aux t)) False = undefined + | T.null t || t == "NA" = CoRec (parsedToColInfo (Possibly T.empty)) + | otherwise = + coRecMapC @Parseable parsedToColInfo + . fromMaybe (CoRec (Possibly T.empty :: Parsed T.Text)) + . firstField + . (tryParseAll :: T.Text -> Rec (Maybe :. Parsed) ts) + $ t +-- where +-- aux = +-- coRecMapC @Parseable parsedToColInfo +-- . fromMaybe (CoRec (Possibly T.empty :: Parsed T.Text)) +-- . firstField +-- . (tryParseAll :: T.Text -> Rec (Maybe :. Parsed) ts) +{-# INLINEABLE bestRep #-} + +instance + ( RPureConstrained Parseable ts + , FoldRec ts ts + , RPureConstrained (ShowF ColInfo) ts + , RecApplicative ts + , T.Text ∈ ts + ) => + ColumnTypeable (CoRec ColInfo ts) + where + colType (CoRec (ColInfo (t, _))) = t + {-# INLINE colType #-} + inferType = bestRep + {-# INLINEABLE inferType #-} #if !MIN_VERSION_vinyl(0,11,0) instance forall ts. (RPureConstrained Show ts, RecApplicative ts) @@ -175,7 +240,7 @@ type CommonColumnsCat = [Bool, Int, Double, Categorical 8, T.Text] type ColumnUniverse = CoRec ColInfo -- | A universe of common column variants. These are the default --- column types that @Frames@ can infer. See the --- for an example of --- extending the default types with your own. +-- column types that @Frames@ can infer. See the +-- for an example of +-- extending the default types with your own. type Columns = ColumnUniverse CommonColumns From 25f16d3774f78de4cd2e919760890505152168c8 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:25:55 -0400 Subject: [PATCH 5/9] Use CoRec as a Semigroup rather than a Monoid --- src/Frames/TH.hs | 13 ++++++++----- test/PrettyTH.hs | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Frames/TH.hs b/src/Frames/TH.hs index 618e195..e6d178b 100644 --- a/src/Frames/TH.hs +++ b/src/Frames/TH.hs @@ -2,6 +2,7 @@ QuasiQuotes, RecordWildCards, RoleAnnotations, ScopedTypeVariables, TemplateHaskell, TupleSections, TypeApplications, TypeOperators #-} +{-# LANGUAGE FlexibleContexts #-} -- | Code generation of types relevant to Frames use-cases. Generation -- may be driven by an automated inference process or manual use of -- the individual helpers. @@ -25,6 +26,7 @@ import Language.Haskell.TH.Syntax import qualified Pipes as P import qualified Pipes.Prelude as P import qualified Pipes.Safe as P +import Data.Vinyl.CoRec (ShowF) -- | Generate a column type. recDec :: [Type] -> Type @@ -70,7 +72,7 @@ lowerHead = fmap aux . T.uncons -- | For each column, we declare a type synonym for its type, and a -- Proxy value of that type. colDec :: T.Text -> String -> T.Text - -> (Either (String -> Q [Dec]) Type) + -> Either (String -> Q [Dec]) Type -> Q (Type, [Dec]) colDec prefix rowName colName colTypeGen = do (colTy, extraDecs) <- either colDecsHelper (pure . (,[])) colTypeGen @@ -139,8 +141,8 @@ data RowGen (a :: [GHC.Type]) = -- -- | Shorthand for a 'Proxy' value of 'ColumnUniverse' applied to the -- -- given type list. --- colQ :: Name -> Q Exp --- colQ n = [e| (Proxy :: Proxy (ColumnUniverse $(conT n))) |] +colQ :: Name -> Q Exp +colQ n = [e| (Proxy :: Proxy (ColumnUniverse $(conT n))) |] -- | A default 'RowGen'. This instructs the type inference engine to -- get column names from the data file, use the default column @@ -194,7 +196,7 @@ colNamesP src = either (const []) fst <$> P.next src -- | Generate a type for a row of a table all of whose columns remain -- unparsed 'Text' values. tableTypesText' :: forall a c. - (c ~ CoRec ColInfo a, ColumnTypeable c, Monoid c) + (c ~ CoRec ColInfo a, ColumnTypeable c, Semigroup c) => RowGen a -> DecsQ tableTypesText' RowGen {..} = do colNames <- runIO . P.runSafeT $ @@ -228,7 +230,7 @@ tableTypesText' RowGen {..} = -- the CSV file has column names \"foo\", \"bar\", and \"baz\", then -- this will declare @type Foo = "foo" :-> Int@, for example, @foo = -- rlens \@Foo@, and @foo' = rlens' \@Foo@. -tableTypes' :: forall a c. (c ~ CoRec ColInfo a, ColumnTypeable c, Monoid c) +tableTypes' :: forall a c. (c ~ CoRec ColInfo a, ColumnTypeable c, Semigroup c, RPureConstrained (ShowF ColInfo) a) => RowGen a -> DecsQ tableTypes' (RowGen {..}) = do headers <- runIO . P.runSafeT @@ -256,3 +258,4 @@ tableTypes' (RowGen {..}) = case mColNm of Just n -> pure (ConT n, []) -- Column's type was already defined Nothing -> colDec (T.pack tablePrefix) rowTypeName colNm colTy + diff --git a/test/PrettyTH.hs b/test/PrettyTH.hs index 56c21c6..2edad7e 100644 --- a/test/PrettyTH.hs +++ b/test/PrettyTH.hs @@ -36,6 +36,7 @@ makePretty = -- Add new lines before type synonym definitions <* " (RIndex " <* some (psym (/= ')')) <* ")")) -- Unqualify names . replace' "Frames.CSV.ParserOptions" "ParserOptions" + . replace' "GHC.Maybe." "" . replace' "GHC.Base." "" . replace' "GHC.Types." "" . replace' "Data.Vinyl.Core." "" From fde6016fe814f97e670c55e4efcf5f5554a587a0 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:26:14 -0400 Subject: [PATCH 6/9] Update flake --- flake.lock | 6 +++--- flake.nix | 11 ++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/flake.lock b/flake.lock index dfe4004..70732e0 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1682779028, - "narHash": "sha256-tFfSbwSLobpHRznAa35KEU3R+fsFWTlmpFhTUdXq8RE=", + "lastModified": 1697793076, + "narHash": "sha256-02e7sCuqLtkyRgrZmdOyvAcQTQdcXj+vpyp9bca6cY4=", "owner": "nixos", "repo": "nixpkgs", - "rev": "54abe781c482f51ff4ff534ebaba77db5bd97442", + "rev": "038b2922be3fc096e1d456f93f7d0f4090628729", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index c67a178..fd37264 100644 --- a/flake.nix +++ b/flake.nix @@ -16,8 +16,17 @@ haskell = pkgs.haskellPackages; haskell-overlay = final: prev: let overrideSrc = pkgs.haskell.lib.overrideSrc; + appendConfigureFlags = pkgs.haskell.lib.appendConfigureFlags; + overrideCabal = pkgs.haskell.lib.overrideCabal; + enableCabalFlag = pkgs.haskell.lib.enableCabalFlag; in { - ${pkg-name} = hspkgs.callCabal2nix pkg-name ./. {}; + ${pkg-name} = + hspkgs.callCabal2nixWithOptions pkg-name ./. "-fdemos" {}; + # enableCabalFlag (hspkgs.callCabal2nix pkg-name ./. {}) "demos"; + # (hspkgs.callCabal2nix pkg-name ./. {}).overrideAttrs(old: { + # configureFlags = (old.configureFlags or []) ++ ["-fdemos"]; + # }); + # Add here any package overrides you may need }; hspkgs = haskell.override { From 2aa4c447bff0e75b9057270982e860270a7f9125 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sat, 21 Oct 2023 23:26:20 -0400 Subject: [PATCH 7/9] CI change compiler matrix --- .github/workflows/ci.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 402a4d0..596a9fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,10 +18,9 @@ jobs: os: [ubuntu-latest] cabal: ["3.6"] ghc: - - "8.8.4" - - "8.10.7" - - "9.0.1" - - "9.2.1" + - "9.0.2" + - "9.2.8" + - "9.4.6" steps: - uses: actions/checkout@v2 From 64c2652f344baa4783e602e051c221b70f7a1439 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sun, 22 Oct 2023 00:50:58 -0400 Subject: [PATCH 8/9] Add fourmolu configuration --- fourmolu.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 fourmolu.yaml diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..c25b243 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,15 @@ +indentation: 4 +function-arrows: leading-args +comma-style: leading +import-export-style: diff-friendly +indent-wheres: false +record-brace-space: false +newlines-between-decls: 1 +haddock-style: single-line +haddock-style-module: single-line +single-constraint-parens: never +let-style: auto +in-style: right-align +respectful: true +fixities: [] +unicode: never From 86da759260ea7494946ce9c86955e8057c64e314 Mon Sep 17 00:00:00 2001 From: Anthony Cowley Date: Sun, 22 Oct 2023 00:56:45 -0400 Subject: [PATCH 9/9] CI update stack config --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 596a9fe..68f2d27 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,9 +58,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # stack: ["2.3.1"] stack: ["latest"] - ghc: ["8.10.7"] + ghc: ["9.4.7"] steps: - uses: actions/checkout@v2