-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from cortze/dev
Merge CLI-refactoring
- Loading branch information
Showing
64 changed files
with
11,691 additions
and
5,854 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,349 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Analysis of the CID pinging phase\n", | ||
"\n", | ||
" " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Import dependencies\n", | ||
"import sqlalchemy as sa\n", | ||
"import pandas as pd\n", | ||
"import seaborn as sns\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"## DB Credentials\n", | ||
"HOST=\"localhost\"\n", | ||
"PORT=\"5432\"\n", | ||
"DB=\"hoarder_test\"\n", | ||
"USER=\"hoarder\"\n", | ||
"PASSWD=\"password\"\n", | ||
"\n", | ||
"# Connecte with the DB\n", | ||
"engine = sa.create_engine(f'postgresql://{USER}:{PASSWD}@{HOST}:{PORT}/{DB}')\n", | ||
"\n", | ||
"## plotting style\n", | ||
"fig_size= (7,4)\n", | ||
"sns.set_context(\"talk\", font_scale=1)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## get the median time of each fetch time\n", | ||
"\n", | ||
"sql_query=\"\"\"\n", | ||
" SELECT \n", | ||
" cid_hash,\n", | ||
" ping_round, \n", | ||
" fetch_time\n", | ||
" FROM fetch_results\n", | ||
" ORDER BY ping_round;\n", | ||
"\"\"\"\n", | ||
"ping_rounds = pd.read_sql_query(sql_query, engine)\n", | ||
"\n", | ||
"avg_fetcht = ping_rounds.groupby(\"ping_round\").mean()\n", | ||
"hours_dist = avg_fetcht[\"fetch_time\"].to_numpy()\n", | ||
"\n", | ||
"hours_dist = (hours_dist - hours_dist[0]) / 3600\n", | ||
"print(hours_dist)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Track the Activity or Onliness of those PR Holders\n", | ||
"We divide them into:\n", | ||
"1. Total PR Holders\n", | ||
"2. Only non-hydra PR Holders\n", | ||
"3. Only hydra PR Holders" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def plot_ping_dist(pd_obj, column_name, opts):\n", | ||
" ## Get the total active peers distribution per ping_round\n", | ||
" pv_table = pd_obj.pivot(index=[\"ping_round\", \"cid_hash\"], columns=column_name, values=\"count\")\n", | ||
" pv_table = pv_table.fillna(0)\n", | ||
" aux = pd.DataFrame(pv_table.to_records())\n", | ||
"\n", | ||
" # make dist\n", | ||
" dist = []\n", | ||
" for i, h in enumerate(hours_dist):\n", | ||
" t = aux.query(f\"ping_round == {i}\")\n", | ||
" dist.append(t[\"True\"])\n", | ||
" \n", | ||
" ## Make a boxplot with the distribution\n", | ||
" fig, ax = plt.subplots(figsize=(12,6))\n", | ||
" ax.boxplot(dist, positions=hours_dist, showfliers=True) \n", | ||
" ticks = np.linspace(0, 36, 10) ###### <---- *UPDATE THIS* to fit the study duration\n", | ||
" plt.xticks(ticks, ticks.astype(int))\n", | ||
" plt.xlabel(\"Time Since Publication (Hours)\")\n", | ||
" plt.ylabel(opts[\"ylabel\"])\n", | ||
" plt.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the active peers distribution per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"SELECT \n", | ||
"\tping.cid_hash,\n", | ||
"\tping.ping_round,\n", | ||
"\tping.is_active,\n", | ||
"\tcount(ping.is_active)\n", | ||
"FROM (\n", | ||
"\tSELECT \n", | ||
"\t\tpr.cid_hash,\n", | ||
"\t\tpr.ping_round,\n", | ||
"\t\tpr.is_active, \n", | ||
"\t\tpr.has_records,\n", | ||
"\t\tpeer_info.client\n", | ||
"\tFROM ping_results as pr\n", | ||
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\tORDER BY ping_round ASC\n", | ||
") as ping\n", | ||
"GROUP BY cid_hash, ping_round, is_active;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"ping_rounds = pd.read_sql_query(sql_query, engine)\n", | ||
"print(\"\")\n", | ||
"plot_ping_dist(ping_rounds, \"is_active\", {\"ylabel\":\"Peers Online\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the active Non-Hydras PR Holders distribution per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"\tSELECT \n", | ||
"\t\tping.cid_hash,\n", | ||
"\t\tping.ping_round,\n", | ||
"\t\tping.is_active,\n", | ||
"\t\tcount(ping.is_active)\n", | ||
"\tFROM (\n", | ||
"\t\tSELECT \n", | ||
"\t\t\tpr.cid_hash,\n", | ||
"\t\t\tpr.ping_round,\n", | ||
"\t\t\tpr.is_active, \n", | ||
"\t\t\tpr.has_records,\n", | ||
"\t\t\tpeer_info.client\n", | ||
"\t\tFROM ping_results as pr\n", | ||
"\t\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\t\tORDER BY ping_round ASC\n", | ||
"\t) as ping\n", | ||
"\tWHERE ping.client!='hydra-booster'\n", | ||
"\tGROUP BY cid_hash, ping_round, is_active;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"pings = pd.read_sql_query(sql_query, engine)\n", | ||
"plot_ping_dist(pings, \"is_active\", {\"ylabel\":\"Peers Online\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the active Hydras PR Holders distribution per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"\tSELECT \n", | ||
"\t\tping.cid_hash,\n", | ||
"\t\tping.ping_round,\n", | ||
"\t\tping.is_active,\n", | ||
"\t\tcount(ping.is_active)\n", | ||
"\tFROM (\n", | ||
"\t\tSELECT \n", | ||
"\t\t\tpr.cid_hash,\n", | ||
"\t\t\tpr.ping_round,\n", | ||
"\t\t\tpr.is_active, \n", | ||
"\t\t\tpr.has_records,\n", | ||
"\t\t\tpeer_info.client\n", | ||
"\t\tFROM ping_results as pr\n", | ||
"\t\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\t\tORDER BY ping_round ASC\n", | ||
"\t) as ping\n", | ||
"\tWHERE ping.client='hydra-booster'\n", | ||
"\tGROUP BY cid_hash, ping_round, is_active;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"pings = pd.read_sql_query(sql_query, engine)\n", | ||
"plot_ping_dist(pings, \"is_active\", {\"ylabel\": \"Peers Online\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Track the whether the PR Holders share the PRs\n", | ||
"We divide them into:\n", | ||
"1. Total PR Holders sharing the PRs\n", | ||
"2. Only non-hydra PR Holders sharing the PRs\n", | ||
"3. Only hydra PR Holders sharing the PRs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the distribution of the PR Holders that share the PRs per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"SELECT \n", | ||
"\tping.cid_hash,\n", | ||
"\tping.ping_round,\n", | ||
"\tping.has_records,\n", | ||
"\tcount(ping.has_records)\n", | ||
"FROM (\n", | ||
"\tSELECT \n", | ||
"\t\tpr.cid_hash,\n", | ||
"\t\tpr.ping_round,\n", | ||
"\t\tpr.is_active, \n", | ||
"\t\tpr.has_records,\n", | ||
"\t\tpeer_info.client\n", | ||
"\tFROM ping_results as pr\n", | ||
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\tORDER BY ping_round ASC\n", | ||
") as ping\n", | ||
"GROUP BY cid_hash, ping_round, has_records;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"pings = pd.read_sql_query(sql_query, engine)\n", | ||
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the non-hydra PR Holders sharing the PRs per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"SELECT \n", | ||
"\tping.cid_hash,\n", | ||
"\tping.ping_round,\n", | ||
"\tping.has_records,\n", | ||
"\tcount(ping.has_records)\n", | ||
"FROM (\n", | ||
"\tSELECT \n", | ||
"\t\tpr.cid_hash,\n", | ||
"\t\tpr.ping_round,\n", | ||
"\t\tpr.is_active, \n", | ||
"\t\tpr.has_records,\n", | ||
"\t\tpeer_info.client\n", | ||
"\tFROM ping_results as pr\n", | ||
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\tORDER BY ping_round ASC\n", | ||
") as ping\n", | ||
"WHERE ping.client!='hydra-booster'\n", | ||
"GROUP BY cid_hash, ping_round, has_records;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"pings = pd.read_sql_query(sql_query, engine)\n", | ||
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Get the distribution of hydra peers sharing the PRs per ping_round\n", | ||
"\n", | ||
"sql_query = \"\"\"\n", | ||
"SELECT \n", | ||
"\tping.cid_hash,\n", | ||
"\tping.ping_round,\n", | ||
"\tping.has_records,\n", | ||
"\tcount(ping.has_records)\n", | ||
"FROM (\n", | ||
"\tSELECT \n", | ||
"\t\tpr.cid_hash,\n", | ||
"\t\tpr.ping_round,\n", | ||
"\t\tpr.is_active, \n", | ||
"\t\tpr.has_records,\n", | ||
"\t\tpeer_info.client\n", | ||
"\tFROM ping_results as pr\n", | ||
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n", | ||
"\tORDER BY ping_round ASC\n", | ||
") as ping\n", | ||
"WHERE ping.client='hydra-booster'\n", | ||
"GROUP BY cid_hash, ping_round, has_records;\n", | ||
"\"\"\"\n", | ||
"\n", | ||
"pings = pd.read_sql_query(sql_query, engine)\n", | ||
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"engine.dispose()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.8.10 ('plotter')", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.10" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "27c6d93b683c7a1975bfd893e997da1d087883bf6b96d34d1e63ecc137ac54d0" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.