Skip to content

Commit

Permalink
Merge pull request #5818 from sagemathinc/huge-syncstrings
Browse files Browse the repository at this point in the history
db/syncstrings: mark syncstrings with too large patches as "huge"
  • Loading branch information
williamstein authored Apr 1, 2022
2 parents cd99057 + a57cd96 commit 65251d7
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 16 deletions.
59 changes: 43 additions & 16 deletions src/packages/database/postgres-blobs.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
dbg("determine inactive syncstring ids")
@_query
query : 'SELECT string_id FROM syncstrings'
where : [{'last_active <= $::TIMESTAMP' : misc.days_ago(opts.age_days)}, 'archived IS NULL']
where : [{'last_active <= $::TIMESTAMP' : misc.days_ago(opts.age_days)}, 'archived IS NULL', 'huge IS NOT TRUE']
limit : opts.limit
timeout_s : TIMEOUT_LONG_S
cb : all_results 'string_id', (err, v) =>
Expand Down Expand Up @@ -641,6 +641,9 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext

# Offlines and archives the patch, unless the string is active very recently, in
# which case this is a no-op.
#
# TODO: this ignores all syncstrings marked as "huge:true", because the patches are too large.
# come up with a better strategy (incremental?) to generate the blobs to avoid the problem.
archive_patches: (opts) =>
opts = defaults opts,
string_id : required
Expand All @@ -649,13 +652,13 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
cutoff : misc.minutes_ago(30) # never touch anything this new
cb : undefined
dbg = @_dbg("archive_patches(string_id='#{opts.string_id}')")
syncstring = patches = blob_uuid = project_id = last_active = undefined
syncstring = patches = blob_uuid = project_id = last_active = huge = undefined
where = {"string_id = $::CHAR(40)" : opts.string_id}
async.series([
(cb) =>
dbg("get project_id")
dbg("get syncstring info")
@_query
query : "SELECT project_id, archived, last_active FROM syncstrings"
query : "SELECT project_id, archived, last_active, huge FROM syncstrings"
where : where
cb : one_result (err, x) =>
if err
Expand All @@ -667,11 +670,16 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
else
project_id = x.project_id
last_active = x.last_active
huge = !!x.huge
dbg("got last_active=#{last_active} project_id=#{project_id} huge=#{huge}")
cb()
(cb) =>
if last_active? and last_active >= opts.cutoff
dbg("excluding due to cutoff")
cb(); return
if huge
dbg("excluding due to being huge")
cb(); return
dbg("get patches")
@export_patches
string_id : opts.string_id
Expand All @@ -681,6 +689,8 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
(cb) =>
if last_active? and last_active >= opts.cutoff
cb(); return
if huge
cb(); return
dbg("create blob from patches")
try
blob = Buffer.from(JSON.stringify(patches))
Expand All @@ -689,20 +699,30 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
# need to break patches up...
# This is not exactly the end of the world as the entire point of all this is to
# just save some space in the database...
cb(err)
return
dbg('save blob')
blob_uuid = misc_node.uuidsha1(blob)
@save_blob
uuid : blob_uuid
blob : blob
project_id : project_id
compress : opts.compress
level : opts.level
cb : cb
dbg('error creating blob, marking syncstring as being "huge": ' + err)
huge = true
@_query
query : "UPDATE syncstrings"
set : {huge : true}
where : where
cb : (err) =>
cb(err)
return
if not huge
dbg('save blob')
blob_uuid = misc_node.uuidsha1(blob)
@save_blob
uuid : blob_uuid
blob : blob
project_id : project_id
compress : opts.compress
level : opts.level
cb : cb
(cb) =>
if last_active? and last_active >= opts.cutoff
cb(); return
if huge
cb(); return
dbg("update syncstring to indicate patches have been archived in a blob")
@_query
query : "UPDATE syncstrings"
Expand All @@ -712,6 +732,8 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
(cb) =>
if last_active? and last_active >= opts.cutoff
cb(); return
if huge
cb(); return
dbg("actually deleting patches")
delete_patches(db:@, string_id: opts.string_id, cb:cb)
], (err) => opts.cb?(err))
Expand Down Expand Up @@ -789,7 +811,12 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
opts.cb(err)
else
for p in patches
p.time = new Date(p.epoch)
# TODO why using epoch and then converting to Date, why not just taking time?
# Besides that: @hsy noticed in development that p.epoch could be a string, resulting in an invalid date.
if typeof p.epoch == 'string'
p.time = new Date(parseInt(p.epoch))
else
p.time = new Date(p.epoch)
delete p.epoch
opts.cb(undefined, patches)

Expand Down
4 changes: 4 additions & 0 deletions src/packages/util/syncstring_schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ schema.syncstrings = {
desc:
"Shared (by all users) configuration settings for editing this file (e.g., which spellcheck language to use).",
},
huge: {
type: "boolean",
desc: "If true, this syncstring contains too many or too large patches to be processed. Hence if this is set, it won't be processed. TODO: implement a better archiving mechanism and then process such 'huge' syncstrings.",
}
},

pg_indexes: ["last_active", "archived"],
Expand Down

0 comments on commit 65251d7

Please sign in to comment.