Skip to content

Commit

Permalink
Support gzip source in Analyser_Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
frodrigo committed Jun 3, 2019
1 parent 578aea0 commit f6cbcaf
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion analysers/Analyser_Merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import io
import bz2
import gzip
from backports import csv # In python3 only just "import csv"
import hashlib
import inspect
Expand Down Expand Up @@ -278,14 +279,15 @@
"""

class Source:
def __init__(self, attribution = None, millesime = None, url = None, name = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, filter = None, logger = None):
def __init__(self, attribution = None, millesime = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, gzip = False, filter = None, logger = None):
"""
Describe the source file.
@param encoding: file charset encoding
@param file: file name in storage
@param urlFile: remote URL of source file
@param fileUrlCache: days for file in cache
@param zip: extract file from zip
@param gzip: uncompress as gzip
@param filter: lambda expression applied on text file before loading
@param logger: a logger
"""
Expand All @@ -296,6 +298,7 @@ def __init__(self, attribution = None, millesime = None, url = None, name = None
self.fileUrl = fileUrl
self.fileUrlCache = fileUrlCache
self.zip = zip
self.gzip = gzip
self.filter = filter
self.logger = logger

Expand Down Expand Up @@ -333,6 +336,10 @@ def open(self):
z = zipfile.ZipFile(f, 'r').open(self.zip)
f = io.BytesIO(z.read())
f.seek(0)
elif self.gzip:
d = gzip.open(downloader.path(self.fileUrl, self.fileUrlCache), mode='r')
f = io.BytesIO(d.read())
f.seek(0)
f = io.StringIO(f.read().decode(self.encoding, 'ignore'))
f.seek(0)
if self.filter:
Expand Down

0 comments on commit f6cbcaf

Please sign in to comment.