Skip to content

Commit

Permalink
add bz2 decompression support
Browse files Browse the repository at this point in the history
  • Loading branch information
fdc-am committed Mar 13, 2016
1 parent 6793439 commit e147f08
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/Control/Distributed/Task/DataAccess/HdfsDataSource.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import qualified Codec.Compression.BZip as BZip
import qualified Codec.Compression.GZip as GZip
import qualified Data.Hadoop.Configuration as HDFS
import qualified Data.Hadoop.Types as HDFS
import Data.List (isSuffixOf)
import Data.List.Split (splitOn)
import qualified Data.Text as T
import Network.Hadoop.Hdfs
import Network.Hadoop.Read
Expand All @@ -32,7 +33,13 @@ loadEntries hdfsLocation = do
where
targetDescription = show hdfsLocation
doLoad = readHdfsFile hdfsLocation
unzipIfNecessary = if ".gz" `isSuffixOf` (snd hdfsLocation) then GZip.decompress else id
unzipIfNecessary =
let parts = splitOn "." (snd hdfsLocation)
suffix = if null parts then "" else last parts
in case suffix of
"gz" -> GZip.decompress
"bz2" -> BZip.decompress
_ -> id

readHdfsFile :: HdfsLocation -> IO BL.ByteString
readHdfsFile (hdfsConfig, path) = do
Expand Down
2 changes: 2 additions & 0 deletions task-distribution.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ library
, time
, hashable
, zlib
, bzlib
, hadoop-rpc
, vector
, containers
Expand Down Expand Up @@ -124,6 +125,7 @@ executable task-distribution-object-code-remote
, vector
, text
, zlib
, bzlib
default-language: Haskell2010

executable run-demo-task
Expand Down

0 comments on commit e147f08

Please sign in to comment.