-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractQueriesDgraphRequestLogging.py
72 lines (59 loc) · 2.78 KB
/
extractQueriesDgraphRequestLogging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import re
import sys
from collections import defaultdict
######
###### Help see how many queries are in use, which guides us in migrating, tuning, and estimating complexity.
######
###### This script processes "Got GraphQL queries" lines from Dgraph request logging and does some
###### basic deduplication and counting to identify how many unique queries are run on a system
###### and how many of each.
######
###### Note this uses very basic heuristics to identify uniqueness, and is not perfect. Because callers can
###### dynamically build and tweak queries, it is very difficult to determine classes of similar queries
###### modulo all the additional filters and included fields that may be added, so this will have to do
######
###### Unfortunately, mutations are not logged (TODO: confirm this), so this script only works for queries.
######
# use the functions and text tokens in a query to make a fingerprint that roughly predicts uniqueness
def extract_functions_and_tokens(query):
# Extract function calls and tokens ending with a colon
functions = re.findall(r'\b\w+\(func:.*?\)', query)
tokens = re.findall(r'\b\w+:', query)[:20] # Get the first 20 tokens
return ' '.join(functions + tokens)
# count semi-unique queries in the file, but stop after hitting some max value to save time and memory
def process_queries(file_path, max_queries):
with open(file_path, 'r') as file:
content = file.read()
# Splitting the file content into individual queries
queries = content.split('-----\n')[1:] # Skip the first split as it will be empty
fingerprints = defaultdict(list)
counts = defaultdict(int)
for query in queries:
fingerprint = extract_functions_and_tokens(query)
counts[fingerprint] += 1
if len(fingerprints[fingerprint]) < max_queries:
fingerprints[fingerprint].append(query)
return fingerprints, counts
def main():
if len(sys.argv) < 3:
print("Usage: python script.py <filename> <max_queries_per_fingerprint>")
sys.exit(1)
file_path = sys.argv[1]
try:
max_queries = int(sys.argv[2])
except ValueError:
print("Please provide a valid integer for max_queries_per_fingerprint.")
sys.exit(1)
fingerprints, counts = process_queries(file_path, max_queries)
# Sorting fingerprints by frequency in descending order
sorted_fingerprints = sorted(fingerprints.items(), key=lambda x: counts[x[0]], reverse=True)
# Output
for fingerprint, query_list in sorted_fingerprints:
print(f"Fingerprint: {fingerprint}")
print(f"Count: {counts[fingerprint]}")
print("Sample Queries:")
for query in query_list:
print(query)
print("\n----------------------\n")
if __name__ == "__main__":
main()