forked from cryptpad/cryptpad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pinneddata.js
284 lines (257 loc) · 11.1 KB
/
pinneddata.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/* jshint esversion: 6, node: true */
const Fs = require('fs');
const Semaphore = require('saferphore');
const nThen = require('nthen');
/*
takes contents of a pinFile (UTF8 string)
and the pin file's name
returns an array of of channel ids which are pinned
throw errors on pin logs with invalid pin data
*/
const hashesFromPinFile = (pinFile, fileName) => {
var pins = {};
pinFile.split('\n').filter((x)=>(x)).map((l) => JSON.parse(l)).forEach((l) => {
switch (l[0]) {
case 'RESET': {
pins = {};
if (l[1] && l[1].length) { l[1].forEach((x) => { pins[x] = 1; }); }
break;
}
case 'PIN': {
l[1].forEach((x) => { pins[x] = 1; });
break;
}
case 'UNPIN': {
l[1].forEach((x) => { delete pins[x]; });
break;
}
default: throw new Error(JSON.stringify(l) + ' ' + fileName);
}
});
return Object.keys(pins);
};
/*
takes an array of pinned file names
and a global map of stats indexed by public keys
returns the sum of the size of those pinned files
*/
const sizeForHashes = (hashes, dsFileStats) => {
let sum = 0;
hashes.forEach((h) => {
const s = dsFileStats[h];
if (typeof(s) !== 'object' || typeof(s.size) !== 'number') {
//console.log('missing ' + h + ' ' + typeof(s));
} else {
sum += s.size;
}
});
return sum;
};
// do twenty things at a time
const sema = Semaphore.create(20);
let dirList;
const fileList = []; // array which we reuse for a lot of things
const dsFileStats = {}; // map of stats
const out = []; // what we return at the end
const pinned = {}; // map of pinned files
// define a function: 'load' which takes a config
// and a callback
module.exports.load = function (config, cb) {
nThen((waitFor) => {
// read the subdirectories in the datastore
Fs.readdir('./datastore', waitFor((err, list) => {
if (err) { throw err; }
dirList = list;
}));
}).nThen((waitFor) => {
// iterate over all subdirectories
dirList.forEach((f) => {
// process twenty subdirectories simultaneously
sema.take((returnAfter) => {
// get the list of files in every subdirectory
// and push them to 'fileList'
Fs.readdir('./datastore/' + f, waitFor(returnAfter((err, list2) => {
if (err) { throw err; }
list2.forEach((ff) => { fileList.push('./datastore/' + f + '/' + ff); });
})));
});
});
}).nThen((waitFor) => {
// read the subdirectories in 'blob'
Fs.readdir('./blob', waitFor((err, list) => {
if (err) { throw err; }
// overwrite dirList
dirList = list;
}));
}).nThen((waitFor) => {
// iterate over all subdirectories
dirList.forEach((f) => {
// process twenty subdirectories simultaneously
sema.take((returnAfter) => {
// get the list of files in every subdirectory
// and push them to 'fileList'
Fs.readdir('./blob/' + f, waitFor(returnAfter((err, list2) => {
if (err) { throw err; }
list2.forEach((ff) => { fileList.push('./blob/' + f + '/' + ff); });
})));
});
});
}).nThen((waitFor) => {
// iterate over the fileList
fileList.forEach((f) => {
// process twenty files simultaneously
sema.take((returnAfter) => {
// get the stats of each files
Fs.stat(f, waitFor(returnAfter((err, st) => {
if (err) { throw err; }
st.filename = f;
// push them to a big map of stats
dsFileStats[f.replace(/^.*\/([^\/\.]*)(\.ndjson)?$/, (all, a) => (a))] = st;
})));
});
});
}).nThen((waitFor) => {
// read the subdirectories in the pinstore
Fs.readdir('./pins', waitFor((err, list) => {
if (err) { throw err; }
dirList = list;
}));
}).nThen((waitFor) => {
// set file list to an empty array
// fileList = [] ??
fileList.splice(0, fileList.length);
dirList.forEach((f) => {
// process twenty directories at a time
sema.take((returnAfter) => {
// get the list of files in every subdirectory
// and push them to 'fileList' (which is empty because we keep reusing it)
Fs.readdir('./pins/' + f, waitFor(returnAfter((err, list2) => {
if (err) { throw err; }
list2.forEach((ff) => { fileList.push('./pins/' + f + '/' + ff); });
})));
});
});
}).nThen((waitFor) => {
// iterate over the list of pin logs
fileList.forEach((f) => {
// twenty at a time
sema.take((returnAfter) => {
// read the full content
Fs.readFile(f, waitFor(returnAfter((err, content) => {
if (err) { throw err; }
// get the list of channels pinned by this log
const hashes = hashesFromPinFile(content.toString('utf8'), f);
if (config.unpinned) {
hashes.forEach((x) => { pinned[x] = 1; });
} else {
// get the size of files pinned by this log
// but only if we're gonna use it
let size = sizeForHashes(hashes, dsFileStats);
// we will return a list of values
// [user_public_key, size_of_files_they_have_pinned]
out.push([f, Math.floor(size / (1024 * 1024))]);
}
})));
});
});
}).nThen(() => {
// handle all the information you've processed so far
if (config.unpinned) {
// the user wants data about what has not been pinned
// by default we concern ourselves with pads and files older than infinity (everything)
let before = Infinity;
// but you can override this with config
if (config.olderthan) {
before = config.olderthan;
// FIXME validate inputs before doing the heavy lifting
if (isNaN(before)) { // make sure the supplied value is a number
return void cb('--olderthan error [' + config.olderthan + '] not a valid date');
}
}
// you can specify a different time for blobs...
let blobsbefore = before;
if (config.blobsolderthan) {
// use the supplied date if it exists
blobsbefore = config.blobsolderthan;
if (isNaN(blobsbefore)) {
return void cb('--blobsolderthan error [' + config.blobsolderthan + '] not a valid date');
}
}
let files = [];
// iterate over all the stats that you've saved
Object.keys(dsFileStats).forEach((f) => {
// we only care about files which are not in the pin map
if (!(f in pinned)) {
// check if it's a blob or a 'pad'
const isBlob = dsFileStats[f].filename.indexOf('.ndjson') === -1;
// if the mtime is newer than the specified value for its file type, ignore this file
if ((+dsFileStats[f].mtime) >= ((isBlob) ? blobsbefore : before)) { return; }
// otherwise push it to the list of files, with its filename, size, and mtime
files.push({
filename: dsFileStats[f].filename,
size: dsFileStats[f].size,
mtime: dsFileStats[f].mtime
});
}
});
// return the list of files
cb(null, files);
} else {
// if you're not in 'unpinned' mode, sort by size (ascending)
out.sort((a,b) => (a[1] - b[1]));
// and return the sorted data
cb(null, out.slice());
}
});
};
// This script can be called directly on its own
// or required as part of another script
if (!module.parent) {
// if no parent, it is being invoked directly
let config = {}; // build the config from command line arguments...
// --unpinned gets the list of unpinned files
// if you don't pass this, it will list the size of pinned data per user
if (process.argv.indexOf('--unpinned') > -1) { config.unpinned = true; }
// '--olderthan' must be used in conjunction with '--unpinned'
// if you pass '--olderthan' with a string date or number, it will limit
// results only to pads older than the supplied time
// it defaults to 'infinity', or no filter at all
const ot = process.argv.indexOf('--olderthan');
if (ot > -1) {
config.olderthan = Number(process.argv[ot+1]) ? new Date(Number(process.argv[ot+1]))
: new Date(process.argv[ot+1]);
}
// '--blobsolderthan' must be used in conjunction with '--unpinned'
// if you pass '--blobsolderthan with a string date or number, it will limit
// results only to blobs older than the supplied time
// it defaults to using the same value passed '--olderthan'
const bot = process.argv.indexOf('--blobsolderthan');
if (bot > -1) {
config.blobsolderthan = Number(process.argv[bot+1]) ? new Date(Number(process.argv[bot+1]))
: new Date(process.argv[bot+1]);
}
// call our big function directly
// pass our constructed configuration and a callback
module.exports.load(config, function (err, data) {
if (err) { throw new Error(err); } // throw errors
if (!Array.isArray(data)) { return; } // if the returned value is not an array, you're done
if (config.unpinned) {
// display the list of unpinned files with their size and mtime
data.forEach((f) => { console.log(f.filename + " " + f.size + " " + (+f.mtime)); });
} else {
// display the list of public keys and the size of the data they have pinned in megabytes
data.forEach((x) => { console.log(x[0] + ' ' + x[1] + ' MB'); });
}
});
}
/* Example usage of this script...
# display the list of public keys and the size of the data the have pinned in megabytes
node pinneddata.js
# display the list of unpinned pads and blobs with their size and mtime
node pinneddata.js --unpinned
# display the list of unpinned pads and blobs older than 12345 with their size and mtime
node pinneddata.js --unpinned --olderthan 12345
# display the list of unpinned pads older than 12345 and unpinned blobs older than 123
# each with their size and mtime
node pinneddata.js --unpinned --olderthan 12345 --blobsolderthan 123
*/