Merge pull request #5818 from sagemathinc/huge-syncstrings

db/syncstrings: mark syncstrings with too large patches as "huge"
sagemathinc · Apr 1, 2022 · 65251d7 · 65251d7
2 parents cd99057 + a57cd96
commit 65251d7
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 16 deletions.
diff --git a/src/packages/database/postgres-blobs.coffee b/src/packages/database/postgres-blobs.coffee
@@ -609,7 +609,7 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
                 dbg("determine inactive syncstring ids")
                 @_query
                     query    : 'SELECT string_id FROM syncstrings'
-                    where    : [{'last_active <= $::TIMESTAMP' : misc.days_ago(opts.age_days)}, 'archived IS NULL']
+                    where    : [{'last_active <= $::TIMESTAMP' : misc.days_ago(opts.age_days)}, 'archived IS NULL', 'huge IS NOT TRUE']
                     limit    : opts.limit
                     timeout_s : TIMEOUT_LONG_S
                     cb       : all_results 'string_id', (err, v) =>
@@ -641,6 +641,9 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
 
     # Offlines and archives the patch, unless the string is active very recently, in
     # which case this is a no-op.
+    #
+    # TODO: this ignores all syncstrings marked as "huge:true", because the patches are too large.
+    #       come up with a better strategy (incremental?) to generate the blobs to avoid the problem.
     archive_patches: (opts) =>
         opts = defaults opts,
             string_id : required
@@ -649,13 +652,13 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
             cutoff    : misc.minutes_ago(30)  # never touch anything this new
             cb        : undefined
         dbg = @_dbg("archive_patches(string_id='#{opts.string_id}')")
-        syncstring = patches = blob_uuid = project_id = last_active = undefined
+        syncstring = patches = blob_uuid = project_id = last_active = huge = undefined
         where = {"string_id = $::CHAR(40)" : opts.string_id}
         async.series([
             (cb) =>
-                dbg("get project_id")
+                dbg("get syncstring info")
                 @_query
-                    query : "SELECT project_id, archived, last_active FROM syncstrings"
+                    query : "SELECT project_id, archived, last_active, huge FROM syncstrings"
                     where : where
                     cb    : one_result (err, x) =>
                         if err
@@ -667,11 +670,16 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
                         else
                             project_id = x.project_id
                             last_active = x.last_active
+                            huge = !!x.huge
+                            dbg("got last_active=#{last_active} project_id=#{project_id} huge=#{huge}")
                             cb()
             (cb) =>
                 if last_active? and last_active >= opts.cutoff
                     dbg("excluding due to cutoff")
                     cb(); return
+                if huge
+                    dbg("excluding due to being huge")
+                    cb(); return
                 dbg("get patches")
                 @export_patches
                     string_id : opts.string_id
@@ -681,6 +689,8 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
             (cb) =>
                 if last_active? and last_active >= opts.cutoff
                     cb(); return
+                if huge
+                    cb(); return
                 dbg("create blob from patches")
                 try
                     blob = Buffer.from(JSON.stringify(patches))
@@ -689,20 +699,30 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
                     # need to break patches up...
                     # This is not exactly the end of the world as the entire point of all this is to
                     # just save some space in the database...
-                    cb(err)
-                    return
-                dbg('save blob')
-                blob_uuid = misc_node.uuidsha1(blob)
-                @save_blob
-                    uuid       : blob_uuid
-                    blob       : blob
-                    project_id : project_id
-                    compress   : opts.compress
-                    level      : opts.level
-                    cb         : cb
+                    dbg('error creating blob, marking syncstring as being "huge": ' + err)
+                    huge = true
+                    @_query
+                        query : "UPDATE syncstrings"
+                        set   : {huge : true}
+                        where : where
+                        cb    : (err) =>
+                            cb(err)
+                            return
+                if not huge
+                    dbg('save blob')
+                    blob_uuid = misc_node.uuidsha1(blob)
+                    @save_blob
+                        uuid       : blob_uuid
+                        blob       : blob
+                        project_id : project_id
+                        compress   : opts.compress
+                        level      : opts.level
+                        cb         : cb
             (cb) =>
                 if last_active? and last_active >= opts.cutoff
                     cb(); return
+                if huge
+                    cb(); return
                 dbg("update syncstring to indicate patches have been archived in a blob")
                 @_query
                     query : "UPDATE syncstrings"
@@ -712,6 +732,8 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
             (cb) =>
                 if last_active? and last_active >= opts.cutoff
                     cb(); return
+                if huge
+                    cb(); return
                 dbg("actually deleting patches")
                 delete_patches(db:@, string_id: opts.string_id, cb:cb)
         ], (err) => opts.cb?(err))
@@ -789,7 +811,12 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
                     opts.cb(err)
                 else
                     for p in patches
-                        p.time = new Date(p.epoch)
+                        # TODO why using epoch and then converting to Date, why not just taking time?
+                        # Besides that: @hsy noticed in development that p.epoch could be a string, resulting in an invalid date.
+                        if typeof p.epoch == 'string'
+                            p.time = new Date(parseInt(p.epoch))
+                        else
+                            p.time = new Date(p.epoch)
                         delete p.epoch
                     opts.cb(undefined, patches)
 

diff --git a/src/packages/util/syncstring_schema.js b/src/packages/util/syncstring_schema.js
@@ -88,6 +88,10 @@ schema.syncstrings = {
       desc:
         "Shared (by all users) configuration settings for editing this file (e.g., which spellcheck language to use).",
     },
+    huge: {
+      type: "boolean",
+      desc: "If true, this syncstring contains too many or too large patches to be processed. Hence if this is set, it won't be processed. TODO: implement a better archiving mechanism and then process such 'huge' syncstrings.",
+    }
   },
 
   pg_indexes: ["last_active", "archived"],