diff --git a/docs/regen_exports.md b/docs/regen_exports.md new file mode 100644 index 000000000..d2176fccc --- /dev/null +++ b/docs/regen_exports.md @@ -0,0 +1,10 @@ +# Regenerating exports + +Should you need to regenerate exports. + +1) Pause the scheduler for the `/do-work` invoker of the export job. +2) Delete the exportfiles for the files you want to regnerate. +3) Mark the batches as `'OPEN'` for the batches you want to regenerate. +4) Increment the value of the `REPROCESS_COUNT` environment variable on the export service. +5) Restart all instances of the export service. +6) Unpause the `/do-work` invoker. \ No newline at end of file diff --git a/internal/export/config.go b/internal/export/config.go index 79d345c5f..33df61a8d 100644 --- a/internal/export/config.go +++ b/internal/export/config.go @@ -52,6 +52,13 @@ type Config struct { TruncateWindow time.Duration `env:"TRUNCATE_WINDOW, default=1h"` MinWindowAge time.Duration `env:"MIN_WINDOW_AGE, default=2h"` TTL time.Duration `env:"CLEANUP_TTL, default=336h"` + // ReprocessCount needs to be incremented by one every time you go back and + // regenerate previously exported files. + ReprocessCount uint `env:"REPROCESS_COUNT, default=0"` +} + +func (c *Config) RepressGeneration() int64 { + return int64(c.ReprocessCount) } func (c *Config) BlobstoreConfig() *storage.Config { diff --git a/internal/export/exportfile.go b/internal/export/exportfile.go index 5a5bf39d7..18f2c341e 100644 --- a/internal/export/exportfile.go +++ b/internal/export/exportfile.go @@ -20,7 +20,6 @@ import ( "crypto" "crypto/rand" "crypto/sha256" - "encoding/hex" "fmt" "io/ioutil" "sort" @@ -52,18 +51,18 @@ type Signer struct { Signer crypto.Signer } -// MarshalExportFile converts the inputs into an encoded byte array and sha256 (base64) of the marshaled protobuf contents. -func MarshalExportFile(eb *model.ExportBatch, exposures, revisedExposures []*publishmodel.Exposure, batchNum, batchSize int, signers []*Signer) ([]byte, string, error) { +// MarshalExportFile converts the inputs into an encoded byte array. +func MarshalExportFile(eb *model.ExportBatch, exposures, revisedExposures []*publishmodel.Exposure, batchNum, batchSize int, signers []*Signer) ([]byte, error) { // create main exposure key export binary expContents, err := marshalContents(eb, exposures, revisedExposures, int32(batchNum), int32(batchSize), signers) if err != nil { - return nil, "", fmt.Errorf("unable to marshal exposure keys: %w", err) + return nil, fmt.Errorf("unable to marshal exposure keys: %w", err) } // create signature file - all exports are generated w/ batchNum: 1 batchSize: 1 - have signature match sigContents, err := marshalSignature(expContents, int32(1), int32(1), signers) if err != nil { - return nil, "", fmt.Errorf("unable to marshal signature file: %w", err) + return nil, fmt.Errorf("unable to marshal signature file: %w", err) } // create compressed archive of binary and signature @@ -71,27 +70,25 @@ func MarshalExportFile(eb *model.ExportBatch, exposures, revisedExposures []*pub zw := zip.NewWriter(buf) zf, err := zw.Create(exportBinaryName) if err != nil { - return nil, "", fmt.Errorf("unable to create zip entry for export: %w", err) + return nil, fmt.Errorf("unable to create zip entry for export: %w", err) } _, err = zf.Write(expContents) if err != nil { - return nil, "", fmt.Errorf("unable to write export to archive: %w", err) + return nil, fmt.Errorf("unable to write export to archive: %w", err) } zf, err = zw.Create(exportSignatureName) if err != nil { - return nil, "", fmt.Errorf("unable to create zip entry for signature: %w", err) + return nil, fmt.Errorf("unable to create zip entry for signature: %w", err) } _, err = zf.Write(sigContents) if err != nil { - return nil, "", fmt.Errorf("unable to write signature to archive: %w", err) + return nil, fmt.Errorf("unable to write signature to archive: %w", err) } if err := zw.Close(); err != nil { - return nil, "", fmt.Errorf("unable to close archive: %w", err) + return nil, fmt.Errorf("unable to close archive: %w", err) } - digest := sha256.Sum256(expContents) - pbSHA := hex.EncodeToString(digest[:]) - return buf.Bytes(), pbSHA, nil + return buf.Bytes(), nil } // UnmarshalExportFile extracts the protobuf encoded exposure key present in the zip archived payload. diff --git a/internal/export/exportfile_test.go b/internal/export/exportfile_test.go index e4cbd30c9..9cfdd621f 100644 --- a/internal/export/exportfile_test.go +++ b/internal/export/exportfile_test.go @@ -95,7 +95,7 @@ func TestMarshalUnmarshalExportFile(t *testing.T) { signer := &customTestSigner{} - blob, pbHexSHA, err := MarshalExportFile(batch, exposures, revisedExposures, 1, 1, []*Signer{ + blob, err := MarshalExportFile(batch, exposures, revisedExposures, 1, 1, []*Signer{ {SignatureInfo: signatureInfo, Signer: signer}, }) if err != nil { @@ -112,11 +112,6 @@ func TestMarshalUnmarshalExportFile(t *testing.T) { t.Errorf("wrong message digest want: %v, got: %v", wantDigest, b64digest) } - wantHexSHA := "8cdf96f439ea7d7c79869fba2da23c26e8a5b055a88b217c0c4ffbdce1993093" - if wantHexSHA != pbHexSHA { - t.Errorf("want PB hex sha to be: %v, got: %v", wantHexSHA, pbHexSHA) - } - infos := []*export.SignatureInfo{ { VerificationKeyVersion: proto.String("1"), diff --git a/internal/export/worker.go b/internal/export/worker.go index 945a75bec..8062c50b6 100644 --- a/internal/export/worker.go +++ b/internal/export/worker.go @@ -359,12 +359,12 @@ func (s *Server) createFile(ctx context.Context, cfi createFileInfo) (string, er } // Generate exposure key export file. - data, pbHexSHA, err := MarshalExportFile(cfi.exportBatch, cfi.exposures, cfi.revisedExposures, cfi.batchNum, cfi.batchSize, signers) + data, err := MarshalExportFile(cfi.exportBatch, cfi.exposures, cfi.revisedExposures, cfi.batchNum, cfi.batchSize, signers) if err != nil { return "", fmt.Errorf("marshaling export file: %w", err) } - objectName := exportFilename(cfi.exportBatch, cfi.batchNum, pbHexSHA) + objectName := exportFilename(cfi.exportBatch, cfi.batchNum, s.config.RepressGeneration()) logger.Infof("Created file %v, signed with %v keys", objectName, len(signers)) ctx, cancel := context.WithTimeout(ctx, blobOperationTimeout) defer cancel() @@ -452,28 +452,10 @@ func (s *Server) createIndex(ctx context.Context, eb *model.ExportBatch, newObje // The batchNum is still needed in the filename to preserve a stable filename sort // order when generating the index file. -func exportFilename(eb *model.ExportBatch, batchNum int, pbHexSHA string) string { - first6 := pbHexSHA - if len(pbHexSHA) >= 6 { - first6 = pbHexSHA[0:6] - } - - // Convert the sha to it's 3-digit ASCII equivalent. This is required because - // some app developers hard-coded a regular expression which assumes only - // digits in filenames. - first6 = toASCIISortable(first6) - - return fmt.Sprintf("%s/%d-%d-%05d999999999%s%s", eb.FilenameRoot, eb.StartTimestamp.Unix(), eb.EndTimestamp.Unix(), batchNum, first6, filenameSuffix) -} - -// toASCIISortable converts each character in the provided string to its -// ASCII-digit-only equivalent string (of numbers), padded to 3 digits. -func toASCIISortable(s string) string { - var result string - for _, r := range s { - result = fmt.Sprintf("%s%03d", result, int(r)) - } - return result +func exportFilename(eb *model.ExportBatch, batchNum int, regenCount int64) string { + sTime := eb.StartTimestamp.Unix() + regenCount + eTime := eb.EndTimestamp.Unix() + regenCount + return fmt.Sprintf("%s/%d-%d-%05d%s", eb.FilenameRoot, sTime, eTime, batchNum, filenameSuffix) } func exportIndexFilename(eb *model.ExportBatch) string { diff --git a/internal/export/worker_test.go b/internal/export/worker_test.go index 6d9614dbb..82bf65e8a 100644 --- a/internal/export/worker_test.go +++ b/internal/export/worker_test.go @@ -443,75 +443,54 @@ func TestExportFilename(t *testing.T) { t.Parallel() cases := []struct { - m *model.ExportBatch - num int - sha string - exp string + name string + m *model.ExportBatch + num int + regenCount int64 + exp string }{ { + name: "no_regn", m: &model.ExportBatch{ FilenameRoot: "v1", StartTimestamp: time.Unix(0, 0), EndTimestamp: time.Unix(0, 0), }, - num: 1, - sha: "abc123", - exp: "v1/0-0-00001999999999097098099049050051.zip", + num: 1, + regenCount: 0, + exp: "v1/0-0-00001.zip", }, { + name: "regen_2", m: &model.ExportBatch{ FilenameRoot: "v1", StartTimestamp: time.Unix(0, 0), EndTimestamp: time.Unix(0, 0), }, - num: 2, - sha: "", - exp: "v1/0-0-00002999999999.zip", + num: 2, + regenCount: 2, + exp: "v1/2-2-00002.zip", }, { + name: "regen_3", m: &model.ExportBatch{ FilenameRoot: "v2", StartTimestamp: time.Unix(100, 0), EndTimestamp: time.Unix(300, 0), }, - num: 1, - sha: "", - exp: "v2/100-300-00001999999999.zip", + num: 1, + regenCount: 3, + exp: "v2/103-303-00001.zip", }, } for _, tc := range cases { tc := tc - t.Run(tc.sha, func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { t.Parallel() - if got, want := exportFilename(tc.m, tc.num, tc.sha), tc.exp; got != want { - t.Errorf("expected %q to be %q", got, want) - } - }) - } -} - -func TestToASCIISortable(t *testing.T) { - t.Parallel() - - cases := []struct { - in string - out string - }{ - {"foo", "102111111"}, - {"bar", "098097114"}, - {"ad3e93", "097100051101057051"}, - } - - for _, tc := range cases { - tc := tc - - t.Run(tc.in, func(t *testing.T) { - t.Parallel() - - if got, want := toASCIISortable(tc.in), tc.out; got != want { + if got, want := exportFilename(tc.m, tc.num, tc.regenCount), tc.exp; got != want { t.Errorf("expected %q to be %q", got, want) } }) diff --git a/tools/export-generate/main.go b/tools/export-generate/main.go index 8edf2a12d..117c0d8b1 100644 --- a/tools/export-generate/main.go +++ b/tools/export-generate/main.go @@ -205,11 +205,11 @@ func (e *exportFileWriter) writeFile() { SignatureInfo: signatureInfo, Signer: e.privateKey, } - data, hexSHA, err := export.MarshalExportFile(e.exportBatch, e.exposures, e.revisions, e.curBatch, e.numBatches, []*export.Signer{signer}) + data, err := export.MarshalExportFile(e.exportBatch, e.exposures, e.revisions, e.curBatch, e.numBatches, []*export.Signer{signer}) if err != nil { log.Fatalf("error marshaling export file: %v", err) } - fileName := fmt.Sprintf(e.exportBatch.FilenameRoot+"%d-records-%d-of-%d-%s"+filenameSuffix, e.totalKeys, e.curBatch, e.numBatches, hexSHA[0:6]) + fileName := fmt.Sprintf(e.exportBatch.FilenameRoot+"%d-records-%d-of-%d"+filenameSuffix, e.totalKeys, e.curBatch, e.numBatches) log.Printf("Creating %v", fileName) err = ioutil.WriteFile(fileName, data, 0666) if err != nil {