diff --git a/CHANGELOG.md b/CHANGELOG.md index acc27df2..92f2ff98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added - Hamming distance +- Braun-Blanquet similarity ### Changed - `classes` parameter added to `matrix_params_from_table` function - Matrices with `numpy.integer` elements are now accepted diff --git a/Document/Document.ipynb b/Document/Document.ipynb index 2809e203..3d546f90 100644 --- a/Document/Document.ipynb +++ b/Document/Document.ipynb @@ -125,6 +125,7 @@ "
  • Adjusted G-Mean
  • \n", "
  • Adjusted F-Score
  • \n", "
  • Overlap Coefficient
  • \n", + "
  • Braun-Blanquet Similarity
  • \n", "
  • Otsuka Ochiai Coefficient
  • \n", "
  • Tversky Index
  • \n", "
  • Area Under The PR Curve
  • \n", @@ -6152,6 +6153,45 @@ "" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### BB (Braun-Blanquet similarity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Braun-Blanquet coefficient is a similarity measure that is mostly used in botany. It is defined as the size of the intersection divided by the larger of the size of the two sets [[82]](#ref82) [[83]](#ref83)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$BB=\\frac{TP}{max(TOP,P)}=min(PPV,TPR)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cm.BB" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -14029,7 +14069,11 @@ "\n", "
    80- R. W. Hamming, \"Error detecting and error correcting codes,\" The Bell system technical journal, vol. 29, no. 2, pp. 147-160, 1950.
    \n", "\n", - "
    81- S. S. Choi, S. H. Cha, and C. C. Tappert, \"A survey of binary similarity and distance measures,\" Journal of systemics, cybernetics and informatics, vol. 8, no. 1, pp. 43-48, 2010.
    \n" + "
    81- S. S. Choi, S. H. Cha, and C. C. Tappert, \"A survey of binary similarity and distance measures,\" Journal of systemics, cybernetics and informatics, vol. 8, no. 1, pp. 43-48, 2010.
    \n", + "\n", + "
    82- J. Braun-Blanquet, \"Plant sociology. The study of plant communities,\" Plant sociology. The study of plant communities. First ed., 1932.
    \n", + "\n", + "
    83- C. C. Little, \"Abydos Documentation,\" 2020.
    \n" ] } ], diff --git a/README.md b/README.md index b85ba3a9..fb2a364e 100644 --- a/README.md +++ b/README.md @@ -932,6 +932,10 @@ PyCM can be used online in interactive Jupyter Notebooks via the Binder or Colab
    81- S. S. Choi, S. H. Cha, and C. C. Tappert, "A survey of binary similarity and distance measures," Journal of systemics, cybernetics and informatics, vol. 8, no. 1, pp. 43-48, 2010.
    +
    82- J. Braun-Blanquet, "Plant sociology. The study of plant communities," Plant sociology. The study of plant communities. First ed., 1932.
    + +
    83- C. C. Little, "Abydos Documentation," 2020.
    + ## Cite diff --git a/Test/function_test.py b/Test/function_test.py index 718ac588..116e9d4f 100644 --- a/Test/function_test.py +++ b/Test/function_test.py @@ -56,119 +56,120 @@ 10-AUNP 11-AUNU 12-AUPR -13-BCD -14-BM -15-Bangdiwala B -16-Bennett S -17-CBA -18-CEN -19-CSI -20-Chi-Squared -21-Chi-Squared DF -22-Conditional Entropy -23-Cramer V -24-Cross Entropy -25-DOR -26-DP -27-DPI -28-ERR -29-F0.5 -30-F1 -31-F1 Macro -32-F1 Micro -33-F2 -34-FDR -35-FN -36-FNR -37-FNR Macro -38-FNR Micro -39-FOR -40-FP -41-FPR -42-FPR Macro -43-FPR Micro -44-G -45-GI -46-GM -47-Gwet AC1 -48-HD -49-Hamming Loss -50-IBA -51-ICSI -52-IS -53-J -54-Joint Entropy -55-KL Divergence -56-Kappa -57-Kappa 95% CI -58-Kappa No Prevalence -59-Kappa Standard Error -60-Kappa Unbiased -61-Krippendorff Alpha -62-LS -63-Lambda A -64-Lambda B -65-MCC -66-MCCI -67-MCEN -68-MK -69-Mutual Information -70-N -71-NIR -72-NLR -73-NLRI -74-NPV -75-OC -76-OOC -77-OP -78-Overall ACC -79-Overall CEN -80-Overall J -81-Overall MCC -82-Overall MCEN -83-Overall RACC -84-Overall RACCU -85-P -86-P-Value -87-PLR -88-PLRI -89-POP -90-PPV -91-PPV Macro -92-PPV Micro -93-PRE -94-Pearson C -95-Phi-Squared -96-Q -97-QI -98-RACC -99-RACCU -100-RCI -101-RR -102-Reference Entropy -103-Response Entropy -104-SOA1(Landis & Koch) -105-SOA2(Fleiss) -106-SOA3(Altman) -107-SOA4(Cicchetti) -108-SOA5(Cramer) -109-SOA6(Matthews) -110-Scott PI -111-Standard Error -112-TN -113-TNR -114-TNR Macro -115-TNR Micro -116-TON -117-TOP -118-TP -119-TPR -120-TPR Macro -121-TPR Micro -122-Y -123-Zero-one Loss -124-dInd -125-sInd +13-BB +14-BCD +15-BM +16-Bangdiwala B +17-Bennett S +18-CBA +19-CEN +20-CSI +21-Chi-Squared +22-Chi-Squared DF +23-Conditional Entropy +24-Cramer V +25-Cross Entropy +26-DOR +27-DP +28-DPI +29-ERR +30-F0.5 +31-F1 +32-F1 Macro +33-F1 Micro +34-F2 +35-FDR +36-FN +37-FNR +38-FNR Macro +39-FNR Micro +40-FOR +41-FP +42-FPR +43-FPR Macro +44-FPR Micro +45-G +46-GI +47-GM +48-Gwet AC1 +49-HD +50-Hamming Loss +51-IBA +52-ICSI +53-IS +54-J +55-Joint Entropy +56-KL Divergence +57-Kappa +58-Kappa 95% CI +59-Kappa No Prevalence +60-Kappa Standard Error +61-Kappa Unbiased +62-Krippendorff Alpha +63-LS +64-Lambda A +65-Lambda B +66-MCC +67-MCCI +68-MCEN +69-MK +70-Mutual Information +71-N +72-NIR +73-NLR +74-NLRI +75-NPV +76-OC +77-OOC +78-OP +79-Overall ACC +80-Overall CEN +81-Overall J +82-Overall MCC +83-Overall MCEN +84-Overall RACC +85-Overall RACCU +86-P +87-P-Value +88-PLR +89-PLRI +90-POP +91-PPV +92-PPV Macro +93-PPV Micro +94-PRE +95-Pearson C +96-Phi-Squared +97-Q +98-QI +99-RACC +100-RACCU +101-RCI +102-RR +103-Reference Entropy +104-Response Entropy +105-SOA1(Landis & Koch) +106-SOA2(Fleiss) +107-SOA3(Altman) +108-SOA4(Cicchetti) +109-SOA5(Cramer) +110-SOA6(Matthews) +111-Scott PI +112-Standard Error +113-TN +114-TNR +115-TNR Macro +116-TNR Micro +117-TON +118-TOP +119-TP +120-TPR +121-TPR Macro +122-TPR Micro +123-Y +124-Zero-one Loss +125-dInd +126-sInd >>> online_help("J") ... >>> online_help("J",alt_link=True) @@ -220,6 +221,8 @@ 'None' >>> RCI_calc(24,0) 'None' +>>> BB_calc(0,0,0) +'None' >>> CEN_calc([1,2,3], {1:{1:0,2:0},2:{1:0,2:0}}, {1:2,2:3}, {1:2,2:3}, 2, modified=False) 'None' >>> convex_combination([1,2,3], {1:{1:0,2:0},2:{1:0,2:0}}, {1:2,2:3}, {1:2,2:3}, 2, modified=False) diff --git a/Test/output_test.py b/Test/output_test.py index 9b14a040..510c5a5e 100644 --- a/Test/output_test.py +++ b/Test/output_test.py @@ -197,6 +197,7 @@ AUC(Area under the ROC curve) None 0.5625 0.63725 0.5 AUCI(AUC value interpretation) None Poor Fair Poor AUPR(Area under the PR curve) None 0.61607 0.41667 None +BB(Braun-Blanquet similarity) 0.0 0.375 0.33333 0.0 BCD(Bray-Curtis dissimilarity) 0.275 0.225 0.025 0.025 BM(Informedness or bookmaker informedness) None 0.125 0.27451 0.0 CEN(Confusion entropy) 0.33496 0.35708 0.53895 0.0 @@ -418,6 +419,7 @@ AUC(Area under the ROC curve) 0.86667 0.61111 0.63889 AUCI(AUC value interpretation) Very Good Fair Fair AUPR(Area under the PR curve) 0.8 0.33333 0.625 +BB(Braun-Blanquet similarity) 0.6 0.33333 0.5 BCD(Bray-Curtis dissimilarity) 0.09524 0.0 0.09524 BM(Informedness or bookmaker informedness) 0.73333 0.22222 0.27778 CEN(Confusion entropy) 0.25 0.52832 0.56439 diff --git a/Test/overall_test.py b/Test/overall_test.py index 800ea44d..a50c4fd9 100644 --- a/Test/overall_test.py +++ b/Test/overall_test.py @@ -102,6 +102,7 @@ AUC(Area under the ROC curve) 0.88889 0.61111 0.58333 AUCI(AUC value interpretation) Very Good Fair Poor AUPR(Area under the PR curve) 0.8 0.41667 0.55 +BB(Braun-Blanquet similarity) 0.6 0.33333 0.5 BCD(Bray-Curtis dissimilarity) 0.08333 0.04167 0.04167 BM(Informedness or bookmaker informedness) 0.77778 0.22222 0.16667 CEN(Confusion entropy) 0.25 0.49658 0.60442 @@ -254,6 +255,7 @@ AUC(Area under the ROC curve) 0.88889 0.61111 0.58333 AUCI(AUC value interpretation) Very Good Fair Poor AUPR(Area under the PR curve) 0.8 0.41667 0.55 +BB(Braun-Blanquet similarity) 0.6 0.33333 0.5 BCD(Bray-Curtis dissimilarity) 0.08333 0.04167 0.04167 BM(Informedness or bookmaker informedness) 0.77778 0.22222 0.16667 CEN(Confusion entropy) 0.25 0.49658 0.60442 @@ -421,6 +423,7 @@ AUC(Area under the ROC curve) None 0.5625 0.63725 0.5 AUCI(AUC value interpretation) None Poor Fair Poor AUPR(Area under the PR curve) None 0.61607 0.41667 None +BB(Braun-Blanquet similarity) 0.0 0.375 0.33333 0.0 BCD(Bray-Curtis dissimilarity) 0.275 0.225 0.025 0.025 BM(Informedness or bookmaker informedness) None 0.125 0.27451 0.0 CEN(Confusion entropy) 0.33496 0.35708 0.53895 0.0 @@ -554,6 +557,7 @@ AUC(Area under the ROC curve) None 0.5625 0.63725 0.5 AUCI(AUC value interpretation) None Poor Fair Poor AUPR(Area under the PR curve) None 0.61607 0.41667 None +BB(Braun-Blanquet similarity) 0.0 0.375 0.33333 0.0 BCD(Bray-Curtis dissimilarity) 0.275 0.225 0.025 0.025 BM(Informedness or bookmaker informedness) None 0.125 0.27451 0.0 CEN(Confusion entropy) 0.33496 0.35708 0.53895 0.0 @@ -845,6 +849,7 @@ AUC(Area under the ROC curve) 0.74167 0.66667 0.80952 AUCI(AUC value interpretation) Good Fair Very Good AUPR(Area under the PR curve) 0.72115 0.55556 0.73333 +BB(Braun-Blanquet similarity) 0.69231 0.55556 0.66667 BCD(Bray-Curtis dissimilarity) 0.01852 0.0 0.01852 BM(Informedness or bookmaker informedness) 0.48333 0.33333 0.61905 CEN(Confusion entropy) 0.45994 0.66249 0.47174 @@ -995,6 +1000,7 @@ AUC(Area under the ROC curve) 0.74167 0.66667 0.80952 AUCI(AUC value interpretation) Good Fair Very Good AUPR(Area under the PR curve) 0.72115 0.55556 0.73333 +BB(Braun-Blanquet similarity) 0.69231 0.55556 0.66667 BCD(Bray-Curtis dissimilarity) 0.01852 0.0 0.01852 BM(Informedness or bookmaker informedness) 0.48333 0.33333 0.61905 CEN(Confusion entropy) 0.45994 0.66249 0.47174 @@ -1147,6 +1153,7 @@ AUC(Area under the ROC curve) 0.86667 0.61111 0.63889 AUCI(AUC value interpretation) Very Good Fair Fair AUPR(Area under the PR curve) 0.8 0.33333 0.625 +BB(Braun-Blanquet similarity) 0.6 0.33333 0.5 BCD(Bray-Curtis dissimilarity) 0.09524 0.0 0.09524 BM(Informedness or bookmaker informedness) 0.73333 0.22222 0.27778 CEN(Confusion entropy) 0.25 0.52832 0.56439 diff --git a/Test/verified_test.py b/Test/verified_test.py index 99a93fe1..e4b7a5b5 100644 --- a/Test/verified_test.py +++ b/Test/verified_test.py @@ -392,4 +392,10 @@ 5 >>> cm2.HD[0] 5 +>>> cm1 = ConfusionMatrix(matrix = {1:{1:2,0:2},0:{0:778,1:2}}) # Verified Case -- (https://bit.ly/3BVdNBp) +>>> cm1.BB[1] +0.5 +>>> cm2 = ConfusionMatrix(matrix = {1:{1:2,0:3},0:{0:775,1:4}}) # Verified Case -- (https://bit.ly/3BVdNBp) +>>> cm2.BB[1] +0.3333333333333333 """ diff --git a/Test/warning_test.py b/Test/warning_test.py index 8a6c826a..04f54865 100644 --- a/Test/warning_test.py +++ b/Test/warning_test.py @@ -129,6 +129,7 @@ AUC(Area under the ROC curve) 1.0 0.95833 0.70455 0.75 1.0 0.75 1.0 0.95833 1.0 1.0 AUCI(AUC value interpretation) Excellent Excellent Good Good Excellent Good Excellent Excellent Excellent Excellent AUPR(Area under the PR curve) 1.0 0.75 0.5 0.75 1.0 0.75 1.0 0.75 1.0 1.0 +BB(Braun-Blanquet similarity) 1.0 0.5 0.5 0.5 1.0 0.5 1.0 0.5 1.0 1.0 BCD(Bray-Curtis dissimilarity) 0.0 0.03846 0.0 0.03846 0.0 0.03846 0.0 0.03846 0.0 0.0 BM(Informedness or bookmaker informedness) 1.0 0.91667 0.40909 0.5 1.0 0.5 1.0 0.91667 1.0 1.0 CEN(Confusion entropy) 0 0.1267 0.23981 0.1267 0 0.1267 0 0.1267 0 0 @@ -289,6 +290,7 @@ AUC(Area under the ROC curve) 1.0 0.95833 0.70455 0.75 1.0 0.75 1.0 0.95833 1.0 1.0 AUCI(AUC value interpretation) Excellent Excellent Good Good Excellent Good Excellent Excellent Excellent Excellent AUPR(Area under the PR curve) 1.0 0.75 0.5 0.75 1.0 0.75 1.0 0.75 1.0 1.0 +BB(Braun-Blanquet similarity) 1.0 0.5 0.5 0.5 1.0 0.5 1.0 0.5 1.0 1.0 BCD(Bray-Curtis dissimilarity) 0.0 0.03846 0.0 0.03846 0.0 0.03846 0.0 0.03846 0.0 0.0 BM(Informedness or bookmaker informedness) 1.0 0.91667 0.40909 0.5 1.0 0.5 1.0 0.91667 1.0 1.0 CEN(Confusion entropy) 0 0.1267 0.23981 0.1267 0 0.1267 0 0.1267 0 0 @@ -469,6 +471,7 @@ AUC(Area under the ROC curve) 0.66667 0.80952 0.74167 None AUCI(AUC value interpretation) Fair Very Good Good None AUPR(Area under the PR curve) 0.55556 0.73333 0.72115 None +BB(Braun-Blanquet similarity) 0.55556 0.66667 0.69231 None BCD(Bray-Curtis dissimilarity) 0.0 0.01852 0.01852 0.0 BM(Informedness or bookmaker informedness) 0.33333 0.61905 0.48333 None CEN(Confusion entropy) 0.51257 0.36499 0.35586 None diff --git a/pycm/pycm_class_func.py b/pycm/pycm_class_func.py index 37c4a495..a1f59e8d 100644 --- a/pycm/pycm_class_func.py +++ b/pycm/pycm_class_func.py @@ -105,6 +105,25 @@ def OC_calc(TP, TOP, P): return "None" +def BB_calc(TP, TOP, P): + """ + Calculate Braun-Blanquet similarity (BB). + + :param TP: true positive + :type TP: int + :param TOP: number of positives in predict vector + :type TOP: int + :param P: number of actual positives + :type P: int + :return: BB as float + """ + try: + BB = TP / max(TOP, P) + return BB + except (ZeroDivisionError, TypeError): + return "None" + + def AGF_calc(TP, FP, FN, TN): """ Calculate Adjusted F-score (AGF). @@ -790,6 +809,7 @@ def class_statistics(TP, TN, FP, FN, classes, table): result["MCCI"][i] = MCC_analysis(result["MCC"][i]) result["AGF"][i] = AGF_calc(TP[i], FP[i], FN[i], TN[i]) result["OC"][i] = OC_calc(TP[i], result["TOP"][i], result["P"][i]) + result["BB"][i] = BB_calc(TP[i], result["TOP"][i], result["P"][i]) result["OOC"][i] = OOC_calc(TP[i], result["TOP"][i], result["P"][i]) result["AUPR"][i] = AUC_calc(result["PPV"][i], result["TPR"][i]) result["ICSI"][i] = MK_BM_calc(result["PPV"][i], result["TPR"][i]) diff --git a/pycm/pycm_handler.py b/pycm/pycm_handler.py index de126862..7736731f 100644 --- a/pycm/pycm_handler.py +++ b/pycm/pycm_handler.py @@ -77,6 +77,7 @@ def __class_stat_init__(cm): cm.AUPR = cm.class_stat["AUPR"] cm.ICSI = cm.class_stat["ICSI"] cm.HD = cm.class_stat["HD"] + cm.BB = cm.class_stat["BB"] def __overall_stat_init__(cm): diff --git a/pycm/pycm_param.py b/pycm/pycm_param.py index 84b04dc1..b781f80a 100644 --- a/pycm/pycm_param.py +++ b/pycm/pycm_param.py @@ -171,7 +171,8 @@ "AUPR", "ICSI", "QI", - "HD"] + "HD", + "BB"] SUMMARY_OVERALL = [ "ACC Macro", @@ -455,7 +456,8 @@ "OOC": "Otsuka-Ochiai coefficient", "AUPR": "Area under the PR curve", "ICSI": "Individual classification success index", - "HD": "Hamming distance"} + "HD": "Hamming distance", + "BB": "Braun-Blanquet similarity"} PARAMS_LINK = { "TPR": "TPR-(True-positive-rate)", @@ -582,9 +584,19 @@ "ARI": "ARI-(Adjusted-Rand-index)", "Bangdiwala B": "Bangdiwala's-B", "Krippendorff Alpha": "Krippendorff's-alpha", - "HD": "HD-(Hamming-distance)"} + "HD": "HD-(Hamming-distance)", + "BB": "BB-(Braun-Blanquet-similarity)"} -CAPITALIZE_FILTER = ["BCD", "AUCI", "Q", "AGF", "OOC", "AUPR", "AUC", "QI"] +CAPITALIZE_FILTER = [ + "BCD", + "AUCI", + "Q", + "AGF", + "OOC", + "AUPR", + "AUC", + "QI", + "BB"] BENCHMARK_COLOR = { "PLRI": {