-
Notifications
You must be signed in to change notification settings - Fork 0
/
select_low_pileup.py
executable file
·128 lines (106 loc) · 5.08 KB
/
select_low_pileup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
import sys
import os
import csv
import json
import argparse
# This script will select an input JSON file and return the subset of that file for which the pileup is less
# than a specified value. Note that no selection on STABLE BEAMS or anything else is applied, so if you want
# that kind of selection, apply it to the input JSON before feeding it to this script (or just modify the
# brilcalc call below).
# IMPORTANT NOTE: The output will discard any low-pileup periods which are just one LS long, since these are
# assumed to be the bottom of emittance scans rather than any actual useful low-pileup data. If you actually
# want to include these go ahead and change the logic in add_to_list below.
# This requires that brilcalc be in your environment. If you're running on lxplus (recommended),
# export PATH=$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH
# Otherwise, see the instructions in https://cms-service-lumi.web.cern.ch/cms-service-lumi/brilwsdoc.html
# Specified value of pileup to keep.
pileup_threshold = 5.0
# Input normtag to use. The exact details probably don't matter much as long as it covers the whole input
# period.
normtag_file = "/cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_PHYSICS.json"
# This stores the run/lumisection numbers to keep. This is stored as a dictionary where the key is the run
# number and the value is the set of lumisections present for that run.
keep_ls = {}
# Parse input arguments.
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--threshold', metavar='THRESHOLD', help='Set threshold value of pileup below which lumisections will be kept.', type=float)
parser.add_argument('infile', help='Input JSON file to process.')
args = parser.parse_args()
infile = args.infile
if args.threshold:
pileup_threshold = args.threshold
# Execute brilcalc using the input JSON file.
brilcalc_output="temp_brilcalc.csv"
print "Getting data from brilcalc, please wait a moment..."
os.system('brilcalc lumi -i '+infile+' --normtag '+normtag_file+' --byls -o '+brilcalc_output)
# Parse the brilcalc output.
with open(brilcalc_output) as csv_input:
reader = csv.reader(csv_input, delimiter=',')
for row in reader:
if row[0][0] == '#':
continue
runfill = row[0].split(":")
lsls = row[1].split(":")
run = int(runfill[0])
ls = int(lsls[0])
pileup = float(row[7])
# If we want to keep it, store it in the dictionary. Storing it as a set also protects us against the
# LSes in the output being out of order (which they may be in some cases).
if (pileup < pileup_threshold):
if not run in keep_ls:
keep_ls[run] = set([ls])
else:
keep_ls[run].add(ls)
# We're done, so write out the output! First, convert the list of kept lumisections into ranges of beginning
# and ending lumisections. This code is mostly borrowed from doFillValidation.py since I know it works (but
# simplified somewhat since we don't have to deal with multiple luminometers).
lastRun = -1
startLS = -1
lastLS = -1
output_json = {}
def add_to_list(run, startLS, lastLS):
# Low-pileup periods that are only one LS long are probably just the bottoms of emittance scans, which are
# not actually really useful for low-pileup studies. So if there's just one LS in the period, go ahead and
# throw it out.
if (startLS != lastLS):
if not run in output_json:
output_json[run] = [[startLS, lastLS]]
else:
output_json[run].append([startLS, lastLS])
for r in sorted(keep_ls.keys()):
for ls in sorted(keep_ls[r]):
# If new run, or discontinuous LS range, save the previous range and move on
if ((r != lastRun and lastRun != -1) or
(ls != lastLS + 1 and lastLS != -1)):
add_to_list(str(lastRun), startLS, lastLS)
startLS = ls
lastRun = r
lastLS = ls
if startLS == -1:
startLS = ls
# Don't forget the end! However if we got nothing at all, then do forget the end.
if (lastRun != -1):
add_to_list(str(lastRun), startLS, lastLS)
# Create output file name by adding _lowPU to the end of the part before the dot.
file_fields = os.path.basename(infile).split(".")
if (len(file_fields) >= 2):
file_fields[-2] += "_lowPU"
else:
file_fields[0] += "_lowPU"
outfile_name = ".".join(file_fields)
# Unfortunately json.dump only has two kinds of formatting: either everything on one line,
# or else every single list element on its own line, both of which are rather difficult to
# read. So instead iterate over the dictionary ourselves and use json.dumps to format each
# element. Not the most elegant solution in the world, but it works.
with open(outfile_name, "w") as outfile:
output_lines = []
for r in sorted(output_json.keys()):
output_lines.append("\""+r+"\": "+json.dumps(output_json[r]))
outfile.write("{\n")
outfile.write(",\n".join(output_lines))
outfile.write("\n}\n")
# Don't forget to clean up!
os.unlink(brilcalc_output)
outfile.close()
print "Output JSON written to "+outfile_name+"."