-
Notifications
You must be signed in to change notification settings - Fork 0
/
big_data_analysis.py
126 lines (108 loc) · 4.69 KB
/
big_data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from datetime import datetime, timezone
from dev import data as D
from dev import utils
import pandas as pd
import numpy as np
from scipy import stats
import time, random, requests, pymongo, threading, sys
from matplotlib import pyplot
class Main:
def __init__(self):
self.config = D.Config.from_dict(utils.load_json("./config.json"))
self.client = pymongo.MongoClient(self.config.mongodb_client)
self.setup_database()
self.players = self.db["players"]
self.rooms = self.db["rooms"]
self.dataTargets = ["level", "matchesWon", "matchesLost", "kills", "teamKills", "arrests", "deaths", "networkHacks", "captures", "pickups", "teamDamage"]
def setup_database(self):
self.db = self.client[self.config.dbname]
collections = ["rooms", "players", "maps"]
for collection_name in collections:
if collection_name not in self.db.list_collection_names():
self.db.create_collection(collection_name)
def normalize(self, stat, level):
#return stat/(level)
return stat / max(level, 1)
#return stat
def getstats(self):
dataTargets = self.dataTargets
data = {}
for x in dataTargets:
data[x] = []
for p in self.players.find():
if "stats" in p and p["stats"] != None:
if p['stats']['level'] < 10: continue
for target in dataTargets:
data[target].append(self.normalize(p["stats"][target], p["stats"]["level"]))
print("Raw data collected.")
#now we have some raw data. Do something with it
dataStats = {}
for x in dataTargets:
dataStats[x] = {"range":0, "mean":0, "median":0, "mode":0}
dataStats[x]["max"] = float(np.max(data[x]))
dataStats[x]["min"] = float(np.min(data[x]))
dataStats[x]["range"] = float(dataStats[x]["max"] - dataStats[x]["min"])
dataStats[x]["mean"] = np.average(data[x])
dataStats[x]["median"] = np.median(data[x])
dataStats[x]["mode"] = np.average(data[x])
#dataStats[x]["mode"] = stats.mode(data[x])
for k, v in dataStats.items(): print(k + ":", v)
utils.save_json("datastats.json", dataStats)
def analyze(self, key):
self.dataStats = utils.load_json("datastats.json")
player = self.players.find_one(key)
target_data = player["stats"]
#print(player["name"])
for target, stats in self.dataStats.items():
value = self.normalize(target_data[target], target_data["level"])
if self.above_average(value, stats["max"], stats["mode"], d=2):
print(player['name'], ' --- ', target, "is above average. ", target_data[target], "| normalized:", value, "| average:", stats["mode"])
#if self.below_average(value, stats["min"], stats["mode"]):
# print(target, "is below average. ", target_data[target], "| normalized:", value)
def above_average(self, value, maxi, avg,d=2):
return value > (avg + (maxi - avg)/d)
def below_average(self, value, mini, avg,d=2):
return value < (avg - (avg - mini)/d)
def when_played(self, key):
player = self.players.find_one(key)
log = []
for room in self.rooms.find({"joinLog": {"$exists": True}, "leaveLog": {"$exists": True}}):
for x in room["joinLog"]:
if x['value'] == player["_id"]: log.append( ( utils.local(x["timestamp"]), "join", room["_id"] ) )
for x in room["leaveLog"]:
if x['value'] == player["_id"]: log.append( ( utils.local(x["timestamp"]), "leave", room["_id"] ) )
def sort_log(value):
return (datetime(year=2015, day=1, month=1) - value[0]).total_seconds()
log.sort(key=sort_log, reverse=True)
print(f"Activity log for {player['name']}")
for x in log:
print(x[0].strftime("%D, %T"), "\t", x[1], "\t", self.rooms.find_one({"_id": x[2]})["name"])
def who_has_briefcase(self, roomId):
room = self.rooms.find_one({"_id": roomId})
players = room["currentAgents"]
likleyhood = []
for x in players:
p = self.players.find_one({"_id": x})
captureRate = int(p['stats']['captures'] / max(p['stats']['pickups'], 1)*1000)/10
likleyhood.append( (x, captureRate) )
def sorting(value):
return value[1]
likleyhood.sort(key=sorting, reverse=True)
player = self.players.find_one({"_id": likleyhood[0][0]})
print(player['name'], f"(level {player['stats']['level']})", f"most likely has the briefcase. They've picked up the briefcase {player['stats']['pickups']} times with a capture success rate of {likleyhood[0][1]}%.")
def get_timeline(self, data:list, freq=".1H"):
x_axis = []
y_axis = []
for xy in data:
x_axis.append(xy["timestamp"])
y_axis.append(xy["value"])
s = pd.Series(y_axis, x_axis).drop_duplicates()
timeline = s.resample(freq).ffill().interpolate() #every 6 minutes
return timeline
m = Main()
m.analyze({"name": "bopman15"})
#m.when_played({"name": "Stoned Cookie"})
#m.who_has_briefcase(76555)
#m.getstats()
#for x in m.players.find({"stats.level": {"$lt": 5}, "firstLogin": {"$gt": "2021-04-15"}}):
# m.analyze({"_id": x["_id"]})