Skip to content

Commit

Permalink
RAR-b results (#7)
Browse files Browse the repository at this point in the history
* rarb results

* rarb results

* rarb results model-level diff

* rarb results model-level diff
  • Loading branch information
gowitheflow-1998 authored Jul 11, 2024
1 parent d5851c8 commit 9b6b7ba
Show file tree
Hide file tree
Showing 378 changed files with 45,333 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "c481e0da3dcbbad8bce7721dea9085b74320a0a3",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.10099,
"ndcg_at_1": 0.03413,
"ndcg_at_3": 0.07297,
"ndcg_at_5": 0.08373,
"ndcg_at_10": 0.10099,
"ndcg_at_100": 0.1587,
"ndcg_at_1000": 0.20125,
"map_at_1": 0.03413,
"map_at_3": 0.06314,
"map_at_5": 0.06903,
"map_at_10": 0.07608,
"map_at_100": 0.08626,
"map_at_1000": 0.08762,
"recall_at_1": 0.03413,
"recall_at_3": 0.10154,
"recall_at_5": 0.12799,
"recall_at_10": 0.18174,
"recall_at_100": 0.47014,
"recall_at_1000": 0.81911,
"precision_at_1": 0.03413,
"precision_at_3": 0.03385,
"precision_at_5": 0.0256,
"precision_at_10": 0.01817,
"precision_at_100": 0.0047,
"precision_at_1000": 0.00082
}
]
},
"task_name": "ARCChallenge"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "303f40ef3d50918d3dc43577d33f2f7344ad72c1",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.18746,
"ndcg_at_1": 0.11227,
"ndcg_at_3": 0.15844,
"ndcg_at_5": 0.17072,
"ndcg_at_10": 0.18746,
"ndcg_at_100": 0.22967,
"ndcg_at_1000": 0.25717,
"map_at_1": 0.11227,
"map_at_3": 0.1473,
"map_at_5": 0.15406,
"map_at_10": 0.161,
"map_at_100": 0.16853,
"map_at_1000": 0.16943,
"recall_at_1": 0.11227,
"recall_at_3": 0.1906,
"recall_at_5": 0.22063,
"recall_at_10": 0.27219,
"recall_at_100": 0.48238,
"recall_at_1000": 0.70692,
"precision_at_1": 0.11227,
"precision_at_3": 0.06353,
"precision_at_5": 0.04413,
"precision_at_10": 0.02722,
"precision_at_100": 0.00482,
"precision_at_1000": 0.00071
}
]
},
"task_name": "AlphaNLI"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "a5c990205e017d10761197ccab3000936689c3ae",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.29021,
"ndcg_at_1": 0.15475,
"ndcg_at_3": 0.23843,
"ndcg_at_5": 0.26644,
"ndcg_at_10": 0.29021,
"ndcg_at_100": 0.33659,
"ndcg_at_1000": 0.36032,
"map_at_1": 0.15475,
"map_at_3": 0.21744,
"map_at_5": 0.233,
"map_at_10": 0.24284,
"map_at_100": 0.25153,
"map_at_1000": 0.25233,
"recall_at_1": 0.15475,
"recall_at_3": 0.29934,
"recall_at_5": 0.36726,
"recall_at_10": 0.44065,
"recall_at_100": 0.6672,
"recall_at_1000": 0.85929,
"precision_at_1": 0.15475,
"precision_at_3": 0.09978,
"precision_at_5": 0.07345,
"precision_at_10": 0.04406,
"precision_at_100": 0.00667,
"precision_at_1000": 0.00086
}
]
},
"task_name": "HellaSwag"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "bb30be7e9184e6b6b1d99bbfe1bb90a3a81842e6",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.27891,
"ndcg_at_1": 0.14799,
"ndcg_at_3": 0.23451,
"ndcg_at_5": 0.25608,
"ndcg_at_10": 0.27891,
"ndcg_at_100": 0.32027,
"ndcg_at_1000": 0.34195,
"map_at_1": 0.14799,
"map_at_3": 0.21409,
"map_at_5": 0.22609,
"map_at_10": 0.23538,
"map_at_100": 0.24327,
"map_at_1000": 0.24398,
"recall_at_1": 0.14799,
"recall_at_3": 0.29325,
"recall_at_5": 0.34548,
"recall_at_10": 0.41676,
"recall_at_100": 0.61643,
"recall_at_1000": 0.79271,
"precision_at_1": 0.14799,
"precision_at_3": 0.09775,
"precision_at_5": 0.0691,
"precision_at_10": 0.04168,
"precision_at_100": 0.00616,
"precision_at_1000": 0.00079
}
]
},
"task_name": "PIQA"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "1851bc536f8bdab29e03e29191c4586b1d8d7c5a",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.07773,
"ndcg_at_1": 0.03088,
"ndcg_at_3": 0.0509,
"ndcg_at_5": 0.0634,
"ndcg_at_10": 0.07773,
"ndcg_at_100": 0.11673,
"ndcg_at_1000": 0.15158,
"map_at_1": 0.03088,
"map_at_3": 0.04577,
"map_at_5": 0.05276,
"map_at_10": 0.05864,
"map_at_100": 0.06539,
"map_at_1000": 0.06647,
"recall_at_1": 0.03088,
"recall_at_3": 0.06581,
"recall_at_5": 0.09596,
"recall_at_10": 0.14044,
"recall_at_100": 0.3375,
"recall_at_1000": 0.62574,
"precision_at_1": 0.03088,
"precision_at_3": 0.02194,
"precision_at_5": 0.01919,
"precision_at_10": 0.01404,
"precision_at_100": 0.00337,
"precision_at_1000": 0.00063
}
]
},
"task_name": "Quail"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "25f7d11a7ac12dcbb8d3836eb2de682b98c825e4",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.56557,
"ndcg_at_1": 0.41173,
"ndcg_at_3": 0.50591,
"ndcg_at_5": 0.53865,
"ndcg_at_10": 0.56557,
"ndcg_at_100": 0.6047,
"ndcg_at_1000": 0.6117,
"map_at_1": 0.41173,
"map_at_3": 0.48259,
"map_at_5": 0.50075,
"map_at_10": 0.51181,
"map_at_100": 0.51956,
"map_at_1000": 0.51982,
"recall_at_1": 0.41173,
"recall_at_3": 0.57345,
"recall_at_5": 0.65296,
"recall_at_10": 0.73652,
"recall_at_100": 0.92318,
"recall_at_1000": 0.97844,
"precision_at_1": 0.41173,
"precision_at_3": 0.19115,
"precision_at_5": 0.13059,
"precision_at_10": 0.07365,
"precision_at_100": 0.00923,
"precision_at_1000": 0.00098
}
]
},
"task_name": "RARbCode"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "2393603c0221ff52f448d12dd75f0856103c6cca",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.72054,
"ndcg_at_1": 0.65928,
"ndcg_at_3": 0.69838,
"ndcg_at_5": 0.70861,
"ndcg_at_10": 0.72054,
"ndcg_at_100": 0.74543,
"ndcg_at_1000": 0.75439,
"map_at_1": 0.65928,
"map_at_3": 0.6889,
"map_at_5": 0.69457,
"map_at_10": 0.69954,
"map_at_100": 0.7043,
"map_at_1000": 0.70463,
"recall_at_1": 0.65928,
"recall_at_3": 0.72575,
"recall_at_5": 0.75059,
"recall_at_10": 0.78731,
"recall_at_100": 0.90758,
"recall_at_1000": 0.97832,
"precision_at_1": 0.65928,
"precision_at_3": 0.24192,
"precision_at_5": 0.15012,
"precision_at_10": 0.07873,
"precision_at_100": 0.00908,
"precision_at_1000": 0.00098
}
]
},
"task_name": "RARbMath"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "4ed8415e9dc24060deefc84be59e2db0aacbadcc",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.05028,
"ndcg_at_1": 0.02456,
"ndcg_at_3": 0.03796,
"ndcg_at_5": 0.04441,
"ndcg_at_10": 0.05028,
"ndcg_at_100": 0.07972,
"ndcg_at_1000": 0.11983,
"map_at_1": 0.02456,
"map_at_3": 0.03463,
"map_at_5": 0.03816,
"map_at_10": 0.04053,
"map_at_100": 0.04526,
"map_at_1000": 0.04646,
"recall_at_1": 0.02456,
"recall_at_3": 0.04759,
"recall_at_5": 0.06346,
"recall_at_10": 0.08188,
"recall_at_100": 0.2349,
"recall_at_1000": 0.56909,
"precision_at_1": 0.02456,
"precision_at_3": 0.01586,
"precision_at_5": 0.01269,
"precision_at_10": 0.00819,
"precision_at_100": 0.00235,
"precision_at_1000": 0.00057
}
]
},
"task_name": "SIQA"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "9ab3ca3ccdd0d43f9cd6d346a363935d127f4f45",
"evaluation_time": 0.0,
"kg_co2_emissions": null,
"mteb_version": "1.12.27",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.03332,
"ndcg_at_1": 0.0039,
"ndcg_at_3": 0.0105,
"ndcg_at_5": 0.01986,
"ndcg_at_10": 0.03332,
"ndcg_at_100": 0.09939,
"ndcg_at_1000": 0.15276,
"map_at_1": 0.00185,
"map_at_3": 0.00713,
"map_at_5": 0.01202,
"map_at_10": 0.01749,
"map_at_100": 0.02849,
"map_at_1000": 0.03015,
"recall_at_1": 0.00185,
"recall_at_3": 0.01512,
"recall_at_5": 0.03561,
"recall_at_10": 0.07355,
"recall_at_100": 0.36765,
"recall_at_1000": 0.74968,
"precision_at_1": 0.0039,
"precision_at_3": 0.00863,
"precision_at_5": 0.01169,
"precision_at_10": 0.01149,
"precision_at_100": 0.00607,
"precision_at_1000": 0.00129
}
]
},
"task_name": "SpartQA"
}
Loading

0 comments on commit 9b6b7ba

Please sign in to comment.