From a9a1a674c236aba91139cadf1ee046366287ebc9 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Thu, 30 Sep 2021 11:45:56 +0200 Subject: [PATCH 1/3] Updated gowarc dependency --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6549f37..79124f0 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.15 require ( github.com/HdrHistogram/hdrhistogram-go v1.1.0 // indirect github.com/coreos/etcd v3.3.13+incompatible - github.com/nlnwa/gowarc v1.0.0-alpha.12 + github.com/nlnwa/gowarc v1.0.0-alpha.13 github.com/nlnwa/veidemann-api/go v0.0.0-20210414094839-b36ce92632fe github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e github.com/opentracing/opentracing-go v1.2.0 diff --git a/go.sum b/go.sum index a002d7b..bdaae0a 100644 --- a/go.sum +++ b/go.sum @@ -278,8 +278,8 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nlnwa/gowarc v1.0.0-alpha.12 h1:1xKWGVr+jPqe5g9Sqgg8qScLE/7+o3NZ/IS2vZMYYKg= -github.com/nlnwa/gowarc v1.0.0-alpha.12/go.mod h1:SUvT0iKudUMshYNnv9zGPW5MmxnInlr3ZshFj+UxaTI= +github.com/nlnwa/gowarc v1.0.0-alpha.13 h1:Y6eZPLaFKvs0QS7YRXjCxq1e+JNDgTzYHWgmyYo6uBM= +github.com/nlnwa/gowarc v1.0.0-alpha.13/go.mod h1:SUvT0iKudUMshYNnv9zGPW5MmxnInlr3ZshFj+UxaTI= github.com/nlnwa/veidemann-api/go v0.0.0-20210414094839-b36ce92632fe h1:yaxQ13HIpCE+I1ZvcVhM1g+sUAopKAAxtt0k1NBmo2Q= github.com/nlnwa/veidemann-api/go v0.0.0-20210414094839-b36ce92632fe/go.mod h1:UVGCJSmHATdV3Eohyq03lF3z86q9nRXRQNv3krrEC8I= github.com/nlnwa/whatwg-url v0.0.0-20200306110950-d1a95e2e8fc3 h1:iarpnapq+Q98GFlSqalcQq9Qc0f5p7hMO/r+65aitOs= From dcebe87f0f7ab668585575f82de2fd31b2135933 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Thu, 30 Sep 2021 11:47:00 +0200 Subject: [PATCH 2/3] Added option to set if WARC file should be flushed to disk after writing each record --- main.go | 1 + server/warcwriter.go | 1 + settings/mock.go | 9 ++++++++- settings/settings.go | 5 +++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 23b52c4..9163119 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,7 @@ func main() { pflag.String("host-name", "", "") pflag.String("warc-dir", "", "") pflag.Int("warc-writer-pool-size", 1, "") + pflag.Bool("flush-record", false, "if true, flush WARC-file to disk after each record.") pflag.String("work-dir", "", "") pflag.Int("termination-grace-period-seconds", 0, "") diff --git a/server/warcwriter.go b/server/warcwriter.go index dcb6d3c..1310059 100644 --- a/server/warcwriter.go +++ b/server/warcwriter.go @@ -209,6 +209,7 @@ func (ww *warcWriter) initFileWriter() { gowarc.WithWarcInfoFunc(ww.warcInfoGenerator), gowarc.WithMaxConcurrentWriters(ww.settings.WarcWriterPoolSize()), gowarc.WithAddWarcConcurrentToHeader(true), + gowarc.WithFlush(ww.settings.FlushRecord()), } ww.fileWriter = gowarc.NewWarcFileWriter(opts...) diff --git a/settings/mock.go b/settings/mock.go index 9a6ad6b..4bc0744 100644 --- a/settings/mock.go +++ b/settings/mock.go @@ -16,7 +16,9 @@ package settings -import "github.com/nlnwa/gowarc" +import ( + "github.com/nlnwa/gowarc" +) type Mock struct { hostName string @@ -24,6 +26,7 @@ type Mock struct { warcWriterPoolSize int workDir string terminationGracePeriodSeconds int + flushRecord bool } func NewMock(warcDir string, warcWriterPoolSize int) *Mock { @@ -53,3 +56,7 @@ func (m Mock) TerminationGracePeriodSeconds() int { func (m Mock) WarcVersion() *gowarc.WarcVersion { return gowarc.V1_1 } + +func (m Mock) FlushRecord() bool { + return m.flushRecord +} diff --git a/settings/settings.go b/settings/settings.go index aa6b344..6237e77 100644 --- a/settings/settings.go +++ b/settings/settings.go @@ -28,6 +28,7 @@ type Settings interface { WorkDir() string TerminationGracePeriodSeconds() int WarcVersion() *gowarc.WarcVersion + FlushRecord() bool } type ViperSettings struct{} @@ -55,3 +56,7 @@ func (s ViperSettings) TerminationGracePeriodSeconds() int { func (s ViperSettings) WarcVersion() *gowarc.WarcVersion { return gowarc.V1_1 } + +func (s ViperSettings) FlushRecord() bool { + return viper.GetBool("flush-record") +} From b89378597f7b69d6acfcfeab82468913345d1b83 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Thu, 30 Sep 2021 11:52:15 +0200 Subject: [PATCH 3/3] Added option to set WARC version to use for generated records --- main.go | 1 + settings/settings.go | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 9163119..ca992fd 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,7 @@ func main() { pflag.Int("port", 8080, "port the browser controller api listens to.") pflag.String("host-name", "", "") pflag.String("warc-dir", "", "") + pflag.String("warc-version", "1.1", "which WARC version to use for generated records. Allowed values: 1.0, 1.1") pflag.Int("warc-writer-pool-size", 1, "") pflag.Bool("flush-record", false, "if true, flush WARC-file to disk after each record.") pflag.String("work-dir", "", "") diff --git a/settings/settings.go b/settings/settings.go index 6237e77..848c166 100644 --- a/settings/settings.go +++ b/settings/settings.go @@ -54,7 +54,15 @@ func (s ViperSettings) TerminationGracePeriodSeconds() int { } func (s ViperSettings) WarcVersion() *gowarc.WarcVersion { - return gowarc.V1_1 + v := viper.GetString("warc-version") + switch v { + case "1.0": + return gowarc.V1_0 + case "1.1": + return gowarc.V1_1 + default: + panic("Unsupported WARC version: " + v) + } } func (s ViperSettings) FlushRecord() bool {