result.py

import json
import numpy as np
import os
import argparse


def f1(p, r):
    if r == 0.:
        return 0.
    return 2 * p * r / float(p + r)

def merge_dict(dict1, dict2):
    res = {**dict1, **dict2}
    return res

def macro(dataset, threshold, if_generate=False):
    p = 0.
    pred_example_count = 0
    r = 0.
    gold_label_count = 0
    res = []
    for raw_dat in dataset:
        gold_labels = raw_dat['annotation']
        confidence_ranking = raw_dat['confidence_ranking']
        predicted_labels = [labels for labels in confidence_ranking
                            if confidence_ranking[labels] >= threshold]

        if if_generate:
            res_buffer = {'id': raw_dat['id'], 'premise': raw_dat['premise'], 'entity': ['entity'],
                          'annotation': raw_dat['annotation'], 'predicted_labels': list(predicted_labels)}
            res.append(res_buffer)

        if predicted_labels:
            per_p = len(set(predicted_labels).intersection(set(gold_labels))) / float(len(predicted_labels))
            pred_example_count += 1
            p += per_p
        if gold_labels:
            per_r = len(set(predicted_labels).intersection(set(gold_labels))) / float(len(gold_labels))
            gold_label_count += 1
            r += per_r

    precision = p / pred_example_count if pred_example_count > 0 else 0
    recall = r / gold_label_count if gold_label_count > 0 else 0

    return precision, recall, res


def load_res(res_path):
    if os.path.isdir(res_path):
        res = []
        for file in os.listdir(res_path):
            path = os.path.join(res_path, file)
            with open(path) as fin:
                raw_dat = fin.read().splitlines()
                res_buffer = [json.loads(items) for items in raw_dat]
                res.extend(res_buffer)
        return res
    elif os.path.isfile(res_path):
        with open(res_path) as fin:
            raw_dat = fin.read().splitlines()
            res = [json.loads(items) for items in raw_dat]
        return res
    else:
        raise ValueError("res_path error!")


def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--dev',
                        type=str,
                        default='',
                        help='path to the DEV result file(s) generated by eval.py')
    parser.add_argument('--test',
                        type=str,
                        default='',
                        help='path to the TEST result file(s) generated by eval.py')
    parser.add_argument('--model_dir',
                        type=str,
                        default='',
                        help='dir path to model checkpoint. Used to save typing result')
    parser.add_argument('--threshold_start',
                        type=float,
                        default=0.0,
                        help='Will loop through [threshold_start, 1.0] on dev set to select '
                             'the best threshold to eval on test set')
    parser.add_argument('--threshold_step',
                        type=float,
                        default=0.005,
                        help='threshold increment every time')

    args = parser.parse_args()

    dev_dat = load_res(args.dev)

    test_dat = load_res(args.test)

    # Loose-macro follow ultra-fine grained entity typing
    print('Eval DEV on Loose Macro Score:')
    f1_champ = 0.0
    threshold_champ = 1.0
    for threshold in np.arange(args.threshold_start, 1.0+args.threshold_step, args.threshold_step):
        precision, recall, res = macro(dev_dat, threshold, False)
        summary = f'Threshold = {threshold}\t'\
                  f'{round(precision, 3) * 100}\t' \
                  f'{round(recall, 3) * 100}\t' \
                  f'{round(f1(precision, recall), 3) * 100}'
        print(summary)

        if f1(precision, recall) > f1_champ:
            f1_champ = f1(precision, recall)
            threshold_champ = threshold
        else:
            pass

    print(f'{"*"*10}\n F1 champ on DEV = {round(f1_champ, 3) * 100} when threshold = {threshold_champ}\n{"*"*10}')

    print("Eval TEST on Loose Macro Score:")
    precision, recall, res = macro(test_dat, threshold_champ, True)
    summary = f'{round(precision, 3) * 100}\t' \
              f'{round(recall, 3) * 100}\t' \
              f'{round(f1(precision, recall), 3) * 100}'
    print(summary)

    # save res file
    with open(os.path.join(args.model_dir,'result.json'), 'w+') as fout:
        fout.write("\n".join([json.dumps(items) for items in res]))


if __name__ == "__main__":
    main()