From f6cbcaf62dacc1f65ec097f08613dd10e7a43895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Mon, 3 Jun 2019 19:57:38 +0200 Subject: [PATCH] Support gzip source in Analyser_Merge --- analysers/Analyser_Merge.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/analysers/Analyser_Merge.py b/analysers/Analyser_Merge.py index cd555c0ac..793e05e87 100644 --- a/analysers/Analyser_Merge.py +++ b/analysers/Analyser_Merge.py @@ -22,6 +22,7 @@ import io import bz2 +import gzip from backports import csv # In python3 only just "import csv" import hashlib import inspect @@ -278,7 +279,7 @@ """ class Source: - def __init__(self, attribution = None, millesime = None, url = None, name = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, filter = None, logger = None): + def __init__(self, attribution = None, millesime = None, encoding = "utf-8", file = None, fileUrl = None, fileUrlCache = 30, zip = None, gzip = False, filter = None, logger = None): """ Describe the source file. @param encoding: file charset encoding @@ -286,6 +287,7 @@ def __init__(self, attribution = None, millesime = None, url = None, name = None @param urlFile: remote URL of source file @param fileUrlCache: days for file in cache @param zip: extract file from zip + @param gzip: uncompress as gzip @param filter: lambda expression applied on text file before loading @param logger: a logger """ @@ -296,6 +298,7 @@ def __init__(self, attribution = None, millesime = None, url = None, name = None self.fileUrl = fileUrl self.fileUrlCache = fileUrlCache self.zip = zip + self.gzip = gzip self.filter = filter self.logger = logger @@ -333,6 +336,10 @@ def open(self): z = zipfile.ZipFile(f, 'r').open(self.zip) f = io.BytesIO(z.read()) f.seek(0) + elif self.gzip: + d = gzip.open(downloader.path(self.fileUrl, self.fileUrlCache), mode='r') + f = io.BytesIO(d.read()) + f.seek(0) f = io.StringIO(f.read().decode(self.encoding, 'ignore')) f.seek(0) if self.filter: