Skip to content

Commit

Permalink
Merge pull request #14 from cortze/dev
Browse files Browse the repository at this point in the history
Merge CLI-refactoring
  • Loading branch information
cortze authored Jan 12, 2023
2 parents 527d4aa + 977c3d1 commit 50abb81
Show file tree
Hide file tree
Showing 64 changed files with 11,691 additions and 5,854 deletions.
9 changes: 6 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ build/
data/*

# Dismiss python cached stuff and venv
__pycache__/
.ipynb_checkpoints/
analyzer/.ipynb_checkpoints/
analyzer/venv/
logs/

# Igonore the .vscode configuration
# Ignore the .vscode configuration
.vscode

#Ignore .idea
.idea

# TODO: update to make file and build folder
dbs
dbs
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ install:

dependencies:
$(GIT_SUBM) update --init
cd go-libp2p-kad-dht && git checkout cid-hoarder
cd go-libp2p-kad-dht && git checkout origin/cid-hoarder


clean:
Expand Down
174 changes: 138 additions & 36 deletions README.md

Large diffs are not rendered by default.

187 changes: 0 additions & 187 deletions analyzer/CID_distribution.ipynb

This file was deleted.

202 changes: 202 additions & 0 deletions analyzer/cid_distribution_in_hash_space.ipynb

Large diffs are not rendered by default.

1,139 changes: 0 additions & 1,139 deletions analyzer/cid_hoarder_analyzer.ipynb

This file was deleted.

349 changes: 349 additions & 0 deletions analyzer/cid_pinging_phase.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analysis of the CID pinging phase\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Import dependencies\n",
"import sqlalchemy as sa\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"## DB Credentials\n",
"HOST=\"localhost\"\n",
"PORT=\"5432\"\n",
"DB=\"hoarder_test\"\n",
"USER=\"hoarder\"\n",
"PASSWD=\"password\"\n",
"\n",
"# Connecte with the DB\n",
"engine = sa.create_engine(f'postgresql://{USER}:{PASSWD}@{HOST}:{PORT}/{DB}')\n",
"\n",
"## plotting style\n",
"fig_size= (7,4)\n",
"sns.set_context(\"talk\", font_scale=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## get the median time of each fetch time\n",
"\n",
"sql_query=\"\"\"\n",
" SELECT \n",
" cid_hash,\n",
" ping_round, \n",
" fetch_time\n",
" FROM fetch_results\n",
" ORDER BY ping_round;\n",
"\"\"\"\n",
"ping_rounds = pd.read_sql_query(sql_query, engine)\n",
"\n",
"avg_fetcht = ping_rounds.groupby(\"ping_round\").mean()\n",
"hours_dist = avg_fetcht[\"fetch_time\"].to_numpy()\n",
"\n",
"hours_dist = (hours_dist - hours_dist[0]) / 3600\n",
"print(hours_dist)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Track the Activity or Onliness of those PR Holders\n",
"We divide them into:\n",
"1. Total PR Holders\n",
"2. Only non-hydra PR Holders\n",
"3. Only hydra PR Holders"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_ping_dist(pd_obj, column_name, opts):\n",
" ## Get the total active peers distribution per ping_round\n",
" pv_table = pd_obj.pivot(index=[\"ping_round\", \"cid_hash\"], columns=column_name, values=\"count\")\n",
" pv_table = pv_table.fillna(0)\n",
" aux = pd.DataFrame(pv_table.to_records())\n",
"\n",
" # make dist\n",
" dist = []\n",
" for i, h in enumerate(hours_dist):\n",
" t = aux.query(f\"ping_round == {i}\")\n",
" dist.append(t[\"True\"])\n",
" \n",
" ## Make a boxplot with the distribution\n",
" fig, ax = plt.subplots(figsize=(12,6))\n",
" ax.boxplot(dist, positions=hours_dist, showfliers=True) \n",
" ticks = np.linspace(0, 36, 10) ###### <---- *UPDATE THIS* to fit the study duration\n",
" plt.xticks(ticks, ticks.astype(int))\n",
" plt.xlabel(\"Time Since Publication (Hours)\")\n",
" plt.ylabel(opts[\"ylabel\"])\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the active peers distribution per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"SELECT \n",
"\tping.cid_hash,\n",
"\tping.ping_round,\n",
"\tping.is_active,\n",
"\tcount(ping.is_active)\n",
"FROM (\n",
"\tSELECT \n",
"\t\tpr.cid_hash,\n",
"\t\tpr.ping_round,\n",
"\t\tpr.is_active, \n",
"\t\tpr.has_records,\n",
"\t\tpeer_info.client\n",
"\tFROM ping_results as pr\n",
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\tORDER BY ping_round ASC\n",
") as ping\n",
"GROUP BY cid_hash, ping_round, is_active;\n",
"\"\"\"\n",
"\n",
"ping_rounds = pd.read_sql_query(sql_query, engine)\n",
"print(\"\")\n",
"plot_ping_dist(ping_rounds, \"is_active\", {\"ylabel\":\"Peers Online\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the active Non-Hydras PR Holders distribution per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"\tSELECT \n",
"\t\tping.cid_hash,\n",
"\t\tping.ping_round,\n",
"\t\tping.is_active,\n",
"\t\tcount(ping.is_active)\n",
"\tFROM (\n",
"\t\tSELECT \n",
"\t\t\tpr.cid_hash,\n",
"\t\t\tpr.ping_round,\n",
"\t\t\tpr.is_active, \n",
"\t\t\tpr.has_records,\n",
"\t\t\tpeer_info.client\n",
"\t\tFROM ping_results as pr\n",
"\t\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\t\tORDER BY ping_round ASC\n",
"\t) as ping\n",
"\tWHERE ping.client!='hydra-booster'\n",
"\tGROUP BY cid_hash, ping_round, is_active;\n",
"\"\"\"\n",
"\n",
"pings = pd.read_sql_query(sql_query, engine)\n",
"plot_ping_dist(pings, \"is_active\", {\"ylabel\":\"Peers Online\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the active Hydras PR Holders distribution per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"\tSELECT \n",
"\t\tping.cid_hash,\n",
"\t\tping.ping_round,\n",
"\t\tping.is_active,\n",
"\t\tcount(ping.is_active)\n",
"\tFROM (\n",
"\t\tSELECT \n",
"\t\t\tpr.cid_hash,\n",
"\t\t\tpr.ping_round,\n",
"\t\t\tpr.is_active, \n",
"\t\t\tpr.has_records,\n",
"\t\t\tpeer_info.client\n",
"\t\tFROM ping_results as pr\n",
"\t\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\t\tORDER BY ping_round ASC\n",
"\t) as ping\n",
"\tWHERE ping.client='hydra-booster'\n",
"\tGROUP BY cid_hash, ping_round, is_active;\n",
"\"\"\"\n",
"\n",
"pings = pd.read_sql_query(sql_query, engine)\n",
"plot_ping_dist(pings, \"is_active\", {\"ylabel\": \"Peers Online\"})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Track the whether the PR Holders share the PRs\n",
"We divide them into:\n",
"1. Total PR Holders sharing the PRs\n",
"2. Only non-hydra PR Holders sharing the PRs\n",
"3. Only hydra PR Holders sharing the PRs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the distribution of the PR Holders that share the PRs per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"SELECT \n",
"\tping.cid_hash,\n",
"\tping.ping_round,\n",
"\tping.has_records,\n",
"\tcount(ping.has_records)\n",
"FROM (\n",
"\tSELECT \n",
"\t\tpr.cid_hash,\n",
"\t\tpr.ping_round,\n",
"\t\tpr.is_active, \n",
"\t\tpr.has_records,\n",
"\t\tpeer_info.client\n",
"\tFROM ping_results as pr\n",
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\tORDER BY ping_round ASC\n",
") as ping\n",
"GROUP BY cid_hash, ping_round, has_records;\n",
"\"\"\"\n",
"\n",
"pings = pd.read_sql_query(sql_query, engine)\n",
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the non-hydra PR Holders sharing the PRs per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"SELECT \n",
"\tping.cid_hash,\n",
"\tping.ping_round,\n",
"\tping.has_records,\n",
"\tcount(ping.has_records)\n",
"FROM (\n",
"\tSELECT \n",
"\t\tpr.cid_hash,\n",
"\t\tpr.ping_round,\n",
"\t\tpr.is_active, \n",
"\t\tpr.has_records,\n",
"\t\tpeer_info.client\n",
"\tFROM ping_results as pr\n",
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\tORDER BY ping_round ASC\n",
") as ping\n",
"WHERE ping.client!='hydra-booster'\n",
"GROUP BY cid_hash, ping_round, has_records;\n",
"\"\"\"\n",
"\n",
"pings = pd.read_sql_query(sql_query, engine)\n",
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Get the distribution of hydra peers sharing the PRs per ping_round\n",
"\n",
"sql_query = \"\"\"\n",
"SELECT \n",
"\tping.cid_hash,\n",
"\tping.ping_round,\n",
"\tping.has_records,\n",
"\tcount(ping.has_records)\n",
"FROM (\n",
"\tSELECT \n",
"\t\tpr.cid_hash,\n",
"\t\tpr.ping_round,\n",
"\t\tpr.is_active, \n",
"\t\tpr.has_records,\n",
"\t\tpeer_info.client\n",
"\tFROM ping_results as pr\n",
"\tLEFT JOIN peer_info ON pr.peer_id = peer_info.peer_id\n",
"\tORDER BY ping_round ASC\n",
") as ping\n",
"WHERE ping.client='hydra-booster'\n",
"GROUP BY cid_hash, ping_round, has_records;\n",
"\"\"\"\n",
"\n",
"pings = pd.read_sql_query(sql_query, engine)\n",
"plot_ping_dist(pings, \"has_records\", {\"ylabel\": \"Peers Sharing PRs\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"engine.dispose()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 ('plotter')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "27c6d93b683c7a1975bfd893e997da1d087883bf6b96d34d1e63ecc137ac54d0"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 50abb81

Please sign in to comment.