diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 171af9593..9cefb7f28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,5 +32,5 @@ repos: rev: v2.2.4 hooks: - id: codespell - args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie"] + args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge"] exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py)$' diff --git a/poetry.lock b/poetry.lock index 9245cb57a..3f89aa02e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.0 and should not be changed by hand. [[package]] name = "aiodns" @@ -1519,6 +1519,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "joblib" +version = "1.3.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, + {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, +] + [[package]] name = "json5" version = "0.9.14" @@ -1927,16 +1938,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2276,6 +2277,31 @@ files = [ {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"}, ] +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "nodeenv" version = "1.8.0" @@ -2871,6 +2897,19 @@ files = [ {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, ] +[[package]] +name = "pycountry" +version = "22.3.5" +description = "ISO country, subdivision, language, currency and script definitions and their translations" +optional = false +python-versions = ">=3.6, <4" +files = [ + {file = "pycountry-22.3.5.tar.gz", hash = "sha256:b2163a246c585894d808f18783e19137cb70a0c18fb36748dc01fc6f109c1646"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "pycparser" version = "2.21" @@ -3281,7 +3320,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3289,15 +3327,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3314,7 +3345,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3322,7 +3352,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -3531,6 +3560,103 @@ files = [ attrs = ">=22.2.0" rpds-py = ">=0.7.0" +[[package]] +name = "regex" +version = "2023.10.3" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.7" +files = [ + {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, + {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"}, + {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"}, + {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"}, + {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"}, + {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"}, + {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"}, + {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"}, + {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"}, + {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"}, + {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"}, + {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"}, + {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"}, + {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"}, + {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"}, + {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"}, + {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"}, + {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"}, + {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"}, + {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"}, + {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"}, + {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"}, + {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"}, + {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"}, + {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"}, + {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"}, + {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"}, + {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"}, + {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"}, + {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"}, + {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"}, + {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -3595,6 +3721,20 @@ pygments = ">=2.13.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "rouge" +version = "1.0.1" +description = "Full Python ROUGE Score Implementation (not a wrapper)" +optional = false +python-versions = "*" +files = [ + {file = "rouge-1.0.1-py3-none-any.whl", hash = "sha256:28d118536e8c774dc47d1d15ec266479b4dd0914c4672ce117d4002789bdc644"}, + {file = "rouge-1.0.1.tar.gz", hash = "sha256:12b48346ca47d6bcf3c45061f315452b9ccec0620ee895ec85b7efc3d54aae34"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "rpds-py" version = "0.12.0" @@ -4734,4 +4874,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "07ba950dc689a3df001e94e9725801e98fc4b62c32bedb6689929bafb9ea88a4" +content-hash = "3a166f0aa467ce77aaf93efbf749bfc3fc841e4f4c70d205feab42725833c1af" diff --git a/pyproject.toml b/pyproject.toml index b44426d38..20120c69b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,9 @@ jupyter = "^1.0.0" requests = "^2.31.0" pytest-xdist = "^3.3.1" langdetect = "^1.0.9" +nltk = "^3.8.1" +pycountry = "^22.3.5" +rouge = "^1.0.1" [tool.poetry.group.dev.dependencies] mypy = "^1.6.1" diff --git a/src/intelligence_layer/core/__init__.py b/src/intelligence_layer/core/__init__.py index 6ad4f74d6..745955a28 100644 --- a/src/intelligence_layer/core/__init__.py +++ b/src/intelligence_layer/core/__init__.py @@ -14,7 +14,7 @@ Language, ) from .echo import EchoInput, EchoOutput, EchoTask -from .evaluator import Dataset, Evaluation, Example +from .evaluator import AggregatedEvaluation, Dataset, Evaluation, Evaluator, Example from .explain import Explain, ExplainInput, ExplainOutput from .prompt_template import ( Cursor, diff --git a/src/intelligence_layer/core/detect_language.py b/src/intelligence_layer/core/detect_language.py index 6224eea05..d1ac54759 100644 --- a/src/intelligence_layer/core/detect_language.py +++ b/src/intelligence_layer/core/detect_language.py @@ -1,6 +1,8 @@ -from typing import Mapping, NewType, Optional, Sequence, TypeVar +from dataclasses import dataclass +from typing import Mapping, Optional, Sequence, TypeVar from langdetect import detect_langs # type: ignore +from pycountry import languages # type: ignore from pydantic import BaseModel from intelligence_layer.core.task import Task @@ -11,8 +13,15 @@ class LanguageNotSupportedError(ValueError): """Raised in case language in the input is not compatible with the languages supported in the task""" -Language = NewType("Language", str) -"""A language identified by its `ISO 639-1 code `_.""" +@dataclass(frozen=True) +class Language: + """A language identified by its `ISO 639-1 code `_.""" + + iso_639_1: str + + def get_name(self) -> Optional[str]: + language = languages.get(alpha_2=self.iso_639_1) + return language.name if language else None Config = TypeVar("Config") @@ -22,7 +31,7 @@ def language_config(language: Language, configs: Mapping[Language, Config]) -> C config = configs.get(language) if config is None: raise LanguageNotSupportedError( - f"{language} not in ({', '.join(configs.keys())})" + f"{language.iso_639_1} not in ({', '.join(lang.iso_639_1 for lang in configs.keys())})" ) return config diff --git a/src/intelligence_layer/core/evaluator.py b/src/intelligence_layer/core/evaluator.py index 3b82a3c4d..ffad6850f 100644 --- a/src/intelligence_layer/core/evaluator.py +++ b/src/intelligence_layer/core/evaluator.py @@ -1,14 +1,21 @@ from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor -from typing import Generic, Optional, Sequence, TypeVar +from dataclasses import dataclass +from typing import Generic, Mapping, Optional, Sequence, TypeVar from uuid import uuid4 +import nltk # type: ignore +from nltk.tokenize import RegexpTokenizer # type: ignore +from nltk.translate.bleu_score import sentence_bleu # type: ignore from pydantic import BaseModel, Field +from rouge import Rouge # type: ignore from tqdm import tqdm from intelligence_layer.core.task import Input from intelligence_layer.core.tracer import PydanticSerializable, Tracer +nltk.download("punkt") + ExpectedOutput = TypeVar("ExpectedOutput", bound=PydanticSerializable) Evaluation = TypeVar("Evaluation", bound=PydanticSerializable) AggregatedEvaluation = TypeVar("AggregatedEvaluation", bound=PydanticSerializable) @@ -71,7 +78,7 @@ def evaluate( tracer: Ttracer used for tracing of tasks. expected_output: Output that is expected from the task run with the supplied input. Returns: - Evaluation: interface of the metrics that come from the evaluated task. + Interface of the metrics that come from the evaluated task. """ pass @@ -87,7 +94,7 @@ def evaluate_dataset( dataset: Dataset that will be used to evaluate a task. tracer: tracer used for tracing. Returns: - AggregatedEvaluation: The aggregated results of an evaluation run with a dataset. + The aggregated results of an evaluation run with a dataset. """ with ThreadPoolExecutor(max_workers=10) as executor: evaluations = list( @@ -116,6 +123,76 @@ def aggregate(self, evaluations: Sequence[Evaluation]) -> AggregatedEvaluation: Args: evalautions: The results from running `evaluate_dataset` with a task. Returns: - AggregatedEvaluation: The aggregated results of an evaluation run with a dataset. + The aggregated results of an evaluation run with a dataset. """ pass + + +def tokenize(input: str) -> Sequence[str]: + """Splits a string into a list of words. + + Removes non-alphanumeric characters and lowercases the given text. + + Args: + input: String to split. + Returns: + List of words. + """ + tokenizer = RegexpTokenizer(r"\w+") + tokens = tokenizer.tokenize(input.lower()) + assert isinstance(tokens, list) + return tokens + + +def calculate_bleu(hypothesis: str, reference: str) -> float: + """Calculates the BLEU-score for the given hypothesis and reference. + + In the summarization use-case the BLEU-score roughly corresponds to the precision of the generated summary with regard to the expected summary. + + Args: + hypothesis: The generation to be evaluated. + reference: The baseline for the evaluation. + + Returns: + BLEU-score, float between 0 and 1. Where 1 means perfect match and 0 no overlap. + """ + hypothesis_tokens = tokenize(hypothesis) + reference_tokens = tokenize(reference) + bleu_score = sentence_bleu( + references=[reference_tokens], hypothesis=hypothesis_tokens + ) + return bleu_score if isinstance(bleu_score, float) else 0.0 + + +@dataclass +class RougeScores: + precision: float + recall: float + f1: float + + @classmethod + def from_rouge_results(cls, rouge_results: Mapping[str, float]) -> "RougeScores": + return cls( + precision=rouge_results["p"], + recall=rouge_results["r"], + f1=rouge_results["f"], + ) + + +def calculate_rouge(hypothesis: str, reference: str) -> RougeScores: + """Calculates the ROUGE-score for the hypothesis and reference. + + In the summarization use-case the ROUGE-score roughly corresponds to the recall of the generated summary with regard to the expected summary. + + Args: + hypothesis: The generation to be evaluated. + reference: The baseline for the evaluation. + + Returns: + ROUGE-score, which contains precision, recall and f1 metrics, all will be floats between 0 and 1. Where 1 means perfect match and 0 no overlap. + """ + hypothesis = " ".join(tokenize(hypothesis)) + reference = " ".join(tokenize(reference)) + rouge = Rouge() + rouge_scores = rouge.get_scores(hypothesis, reference)[0]["rouge-2"] + return RougeScores.from_rouge_results(rouge_scores) diff --git a/src/intelligence_layer/use_cases/classify/keyword_extract.py b/src/intelligence_layer/use_cases/classify/keyword_extract.py index 261f670c0..2d5a74fa5 100644 --- a/src/intelligence_layer/use_cases/classify/keyword_extract.py +++ b/src/intelligence_layer/use_cases/classify/keyword_extract.py @@ -10,7 +10,7 @@ FewShotExample, FewShotInput, ) -from intelligence_layer.core.detect_language import Language, LanguageNotSupportedError +from intelligence_layer.core.detect_language import Language, language_config from intelligence_layer.core.task import Task from intelligence_layer.core.tracer import TaskSpan @@ -158,11 +158,7 @@ def __init__( def do_run( self, input: KeywordExtractInput, task_span: TaskSpan ) -> KeywordExtractOutput: - config = self._few_shot_configs.get(input.language) - if config is None: - raise LanguageNotSupportedError( - f"{input.language} not in ({', '.join(self._few_shot_configs.keys())})" - ) + config = language_config(input.language, self._few_shot_configs) result = self._few_shot.run( FewShotInput( few_shot_config=config, diff --git a/src/intelligence_layer/use_cases/qa/single_chunk_qa.py b/src/intelligence_layer/use_cases/qa/single_chunk_qa.py index f709da937..226684adb 100644 --- a/src/intelligence_layer/use_cases/qa/single_chunk_qa.py +++ b/src/intelligence_layer/use_cases/qa/single_chunk_qa.py @@ -6,7 +6,7 @@ from intelligence_layer.core.chunk import Chunk from intelligence_layer.core.complete import Instruct, InstructInput, PromptOutput -from intelligence_layer.core.detect_language import Language, LanguageNotSupportedError +from intelligence_layer.core.detect_language import Language, language_config from intelligence_layer.core.prompt_template import PromptWithMetadata from intelligence_layer.core.task import Task from intelligence_layer.core.text_highlight import TextHighlight, TextHighlightInput @@ -109,11 +109,7 @@ def __init__( def do_run( self, input: SingleChunkQaInput, task_span: TaskSpan ) -> SingleChunkQaOutput: - instruction_text = self._instruction_config.get(input.language) - if not instruction_text: - raise LanguageNotSupportedError( - f"{input.language} not in ({', '.join(self._instruction_config.keys())})" - ) + instruction_text = language_config(input.language, self._instruction_config) output = self._generate_answer( Template(instruction_text).render( diff --git a/src/intelligence_layer/use_cases/summarize/summarize.py b/src/intelligence_layer/use_cases/summarize/summarize.py index 54de1295c..5aa5f12a9 100644 --- a/src/intelligence_layer/use_cases/summarize/summarize.py +++ b/src/intelligence_layer/use_cases/summarize/summarize.py @@ -1,9 +1,13 @@ -from typing import Sequence +from statistics import mean +from typing import Sequence, Union from pydantic import BaseModel from intelligence_layer.core.chunk import Chunk from intelligence_layer.core.detect_language import Language +from intelligence_layer.core.evaluator import Evaluator, calculate_bleu, calculate_rouge +from intelligence_layer.core.task import Task +from intelligence_layer.core.tracer import Tracer class LongContextSummarizeInput(BaseModel): @@ -53,3 +57,116 @@ class SingleChunkSummarizeOutput(BaseModel): """ summary: str + + +class SummarizeEvaluation(BaseModel): + """The evaluation of a summarization run. + + Attributes: + bleu: roughly corresponds to precision + rouge: rougly corresponds to recall + output: The actual output from the task run + """ + + bleu: float + rouge: float + output: Union[SingleChunkSummarizeOutput, LongContextSummarizeOutput] + + +class AggregatedSummarizeEvaluation(BaseModel): + """The aggregated evaluation of a summarization implementation against a dataset. + + Attributes: + aggregate_bleu: average over BLEU-scores + aggregate_rouge: average over ROUGE-scores + evaluation: The actual evaluations + """ + + aggregate_bleu: float + aggregate_rouge: float + evaluations: Sequence[SummarizeEvaluation] + + +class SingleChunkSummarizeEvaluator( + Evaluator[ + SingleChunkSummarizeInput, + str, + SummarizeEvaluation, + AggregatedSummarizeEvaluation, + ] +): + def __init__( + self, task: Task[SingleChunkSummarizeInput, SingleChunkSummarizeOutput] + ) -> None: + self.task = task + + def evaluate( + self, + input: SingleChunkSummarizeInput, + tracer: Tracer, + expected_output: str, + ) -> SummarizeEvaluation: + summary = self.task.run(input, tracer) + bleu_score = calculate_bleu(summary.summary, expected_output) + rouge_score = calculate_rouge(summary.summary, expected_output) + + return SummarizeEvaluation( + bleu=bleu_score, rouge=rouge_score.recall, output=summary + ) + + def aggregate( + self, evaluations: Sequence[SummarizeEvaluation] + ) -> AggregatedSummarizeEvaluation: + if len(evaluations) != 0: + bleu_avg = mean(eval.bleu for eval in evaluations) + rouge_avg = mean(eval.rouge for eval in evaluations) + else: + bleu_avg = 0.0 + rouge_avg = 0.0 + return AggregatedSummarizeEvaluation( + aggregate_bleu=bleu_avg, aggregate_rouge=rouge_avg, evaluations=evaluations + ) + + +class LongContextSummarizeEvaluator( + Evaluator[ + LongContextSummarizeInput, + str, + SummarizeEvaluation, + AggregatedSummarizeEvaluation, + ] +): + def __init__( + self, task: Task[LongContextSummarizeInput, LongContextSummarizeOutput] + ) -> None: + self.task = task + + def evaluate( + self, + input: LongContextSummarizeInput, + tracer: Tracer, + expected_output: str, + ) -> SummarizeEvaluation: + output = self.task.run(input, tracer) + joint_summary = " ".join( + partial_summary.summary for partial_summary in output.partial_summaries + ) + bleu_score = calculate_bleu(joint_summary, expected_output) + rouge_score = calculate_rouge(joint_summary, expected_output) + + return SummarizeEvaluation( + bleu=bleu_score, rouge=rouge_score.recall, output=output + ) + + def aggregate( + self, evaluations: Sequence[SummarizeEvaluation] + ) -> AggregatedSummarizeEvaluation: + if len(evaluations) != 0: + bleu_avg = mean(eval.bleu for eval in evaluations) + rouge_avg = mean(eval.rouge for eval in evaluations) + else: + bleu_avg = 0.0 + rouge_avg = 0.0 + return AggregatedSummarizeEvaluation( + aggregate_bleu=bleu_avg, aggregate_rouge=rouge_avg, evaluations=evaluations + ) diff --git a/tests/core/test_detect_language.py b/tests/core/test_detect_language.py index f1fc7a52e..840654a33 100644 --- a/tests/core/test_detect_language.py +++ b/tests/core/test_detect_language.py @@ -16,4 +16,4 @@ def test_detect_language_returns_correct_language() -> None: tracer = NoOpTracer() output = task.run(input, tracer) - assert output.best_fit == "en" + assert output.best_fit == Language("en") diff --git a/tests/use_cases/summarize/conftest.py b/tests/use_cases/summarize/conftest.py new file mode 100644 index 000000000..5a84c8f32 --- /dev/null +++ b/tests/use_cases/summarize/conftest.py @@ -0,0 +1,58 @@ +from aleph_alpha_client import Client +from pytest import fixture + +from intelligence_layer.core.chunk import Chunk +from intelligence_layer.use_cases.summarize.long_context_high_compression_summarize import ( + LongContextHighCompressionSummarize, +) +from intelligence_layer.use_cases.summarize.single_chunk_few_shot_summarize import ( + SingleChunkFewShotSummarize, +) + + +@fixture +def single_chunk_few_shot_summarize(client: Client) -> SingleChunkFewShotSummarize: + return SingleChunkFewShotSummarize(client, "luminous-extended", 128) + + +@fixture +def chunk() -> Chunk: + return Chunk( + "The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]" + ) + + +@fixture +def long_context_high_compression_summarize( + client: Client, +) -> LongContextHighCompressionSummarize: + return LongContextHighCompressionSummarize(client) + + +@fixture +def long_text() -> str: + return """The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10] + +While the brown bear's range has shrunk, and it has faced local extinctions across its wide range, it remains listed as a least concern species by the International Union for Conservation of Nature (IUCN) with a total estimated population in 2017 of 110,000. As of 2012, this and the American black bear are the only bear species not classified as threatened by the IUCN, though the large sizes of both bears may be a disadvantage due to increased competition with humans.[1][3][7] Populations that were hunted to extinction in the 19th and 20th centuries are the Atlas bear of North Africa and the Californian, Ungavan[11][12] and Mexican populations of the grizzly bear of North America. Many of the populations in the southern parts of Eurasia are highly endangered as well.[1][13] One of the smaller-bodied forms, the Himalayan brown bear, is critically endangered, occupying only 2% of its former range and threatened by uncontrolled poaching for its body parts.[14] The Marsican brown bear of central Italy is one of several currently isolated populations of the Eurasian brown bear and is believed to have a population of just 50 to 60 bears.[10][15] + +Evolution and taxonomy +The brown bear is sometimes referred to as the bruin, from Middle English. This name originated in the fable History of Reynard the Fox translated by William Caxton from Middle Dutch bruun or bruyn, meaning brown (the color).[16][17] In the mid-19th century United States, the brown bear was termed "Old Ephraim" and sometimes as "Moccasin Joe".[18] + +The scientific name of the brown bear, Ursus arctos, comes from the Latin ursus, meaning "bear",[19] and the Greek ἄρκτος/arktos, also meaning "bear".[20] + +Generalized names and evolution +Brown bears are thought to have evolved from Ursus etruscus in Asia.[21][22] The brown bear, per Kurten (1976), has been stated as "clearly derived from the Asian population of Ursus savini about 800,000 years ago; spread into Europe, to the New World."[23] A genetic analysis indicated that the brown bear lineage diverged from the cave bear species complex approximately 1.2–1.4 million years ago, but did not clarify if U. savini persisted as a paraspecies for the brown bear before perishing.[24] The oldest fossils positively identified as from this species occur in China from about 0.5 million years ago. Brown bears entered Europe about 250,000 years ago and North Africa shortly after.[21][25] Brown bear remains from the Pleistocene period are common in the British Isles, where it is thought they might have outcompeted cave bears (Ursus spelaeus). The species entered Alaska 100,000 years ago, though they did not move south until 13,000 years ago.[21] It is speculated that brown bears were unable to migrate south until the extinction of the much larger giant short-faced bear (Arctodus simus).[26][27] + +Several paleontologists suggest the possibility of two separate brown bear migrations. First, the inland brown bears, also known as grizzlies, are thought to stem from narrow-skulled bears which migrated from northern Siberia to central Alaska and the rest of the continent. Moreover, the Kodiak bears descend from broad-skulled bears from Kamchatka, which colonized the Alaskan peninsula. Brown bear fossils discovered in Ontario, Ohio, Kentucky and Labrador show that the species occurred farther east than indicated in historic records.[21] In North America, two types of the subspecies Ursus arctos horribilis are generally recognized—the coastal brown bear and the inland grizzly bear; these two types broadly define the range of sizes of all brown bear subspecies.[13] + +Scientific taxonomy +Main article: Subspecies of brown bear + +Adult female Eurasian brown bear, the nominate subspecies +There are many methods used by scientists to define bear species and subspecies, as no one method is always effective. Brown bear taxonomy and subspecies classification has been described as "formidable and confusing," with few authorities listing the same specific set of subspecies.[28] Genetic testing is now perhaps the most important way to scientifically define brown bear relationships and names. Generally, genetic testing uses the word clade rather than species because a genetic test alone cannot define a biological species. Most genetic studies report on how closely related the bears are (or their genetic distance). There are hundreds of obsolete brown bear subspecies, each with its own name, so this can become confusing. Hall (1981) lists 86 different types, and even as many as 90 have been proposed.[29][30] However, recent DNA analysis has identified as few as five main clades which contain all extant brown bears,[31][32] while a 2017 phylogenetic study revealed nine clades, including one representing polar bears.[33] As of 2005, 15 extant or recently extinct subspecies were recognized by the general scientific community.[34][35] + +As well as the exact number of overall brown bear subspecies, its precise relationship to the polar bear also remains in debate. The polar bear is a recent offshoot of the brown bear. The point at which the polar bear diverged from the brown bear is unclear, with estimations based on genetics and fossils ranging from 400,000 to 70,000 years ago, but most recent analysis has indicated that the polar bear split somewhere between 275,000 and 150,000 years ago.[36] Under some definitions, the brown bear can be construed as the paraspecies for the polar bear.[37][38][39][40] + +DNA analysis shows that, apart from recent human-caused population fragmentation,[41] brown bears in North America are generally part of a single interconnected population system, with the exception of the population (or subspecies) in the Kodiak Archipelago, which has probably been isolated since the end of the last Ice Age.[42][43] These data demonstrate that U. a. gyas, U. a. horribilis, U. a. sitkensis and U. a. stikeenensis are not distinct or cohesive groups, and would more accurately be described as ecotypes. For example, brown bears in any particular region of the Alaska coast are more closely related to adjacent grizzly bears than to distant populations of brown bears,[44] the morphological distinction seemingly driven by brown bears having access to a rich salmon food source, while grizzly bears live at higher elevation, or further from the coast, where plant material is the base of the diet. The history of the bears of the Alexander Archipelago is unusual in that these island populations carry polar bear DNA, presumably originating from a population of polar bears that was left behind at the end of the Pleistocene, but have since been connected with adjacent mainland populations through movement of males, to the point where their nuclear genomes are now more than 90% of brown bear ancestry.[45] + +Brown bears are apparently divided into five different clades, some of which coexist or co-occur in different regions.[3]""" diff --git a/tests/use_cases/summarize/test_long_context_high_compression_summarize.py b/tests/use_cases/summarize/test_long_context_high_compression_summarize.py index 330c70ab4..666df8a04 100644 --- a/tests/use_cases/summarize/test_long_context_high_compression_summarize.py +++ b/tests/use_cases/summarize/test_long_context_high_compression_summarize.py @@ -1,6 +1,3 @@ -from aleph_alpha_client import Client -from pytest import fixture - from intelligence_layer.core.tracer import NoOpTracer from intelligence_layer.use_cases.summarize.long_context_high_compression_summarize import ( LongContextHighCompressionSummarize, @@ -8,42 +5,6 @@ from intelligence_layer.use_cases.summarize.summarize import LongContextSummarizeInput -@fixture -def long_context_high_compression_summarize( - client: Client, -) -> LongContextHighCompressionSummarize: - return LongContextHighCompressionSummarize(client) - - -@fixture -def long_text() -> str: - return """The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10] - -While the brown bear's range has shrunk, and it has faced local extinctions across its wide range, it remains listed as a least concern species by the International Union for Conservation of Nature (IUCN) with a total estimated population in 2017 of 110,000. As of 2012, this and the American black bear are the only bear species not classified as threatened by the IUCN, though the large sizes of both bears may be a disadvantage due to increased competition with humans.[1][3][7] Populations that were hunted to extinction in the 19th and 20th centuries are the Atlas bear of North Africa and the Californian, Ungavan[11][12] and Mexican populations of the grizzly bear of North America. Many of the populations in the southern parts of Eurasia are highly endangered as well.[1][13] One of the smaller-bodied forms, the Himalayan brown bear, is critically endangered, occupying only 2% of its former range and threatened by uncontrolled poaching for its body parts.[14] The Marsican brown bear of central Italy is one of several currently isolated populations of the Eurasian brown bear and is believed to have a population of just 50 to 60 bears.[10][15] - -Evolution and taxonomy -The brown bear is sometimes referred to as the bruin, from Middle English. This name originated in the fable History of Reynard the Fox translated by William Caxton from Middle Dutch bruun or bruyn, meaning brown (the color).[16][17] In the mid-19th century United States, the brown bear was termed "Old Ephraim" and sometimes as "Moccasin Joe".[18] - -The scientific name of the brown bear, Ursus arctos, comes from the Latin ursus, meaning "bear",[19] and the Greek ἄρκτος/arktos, also meaning "bear".[20] - -Generalized names and evolution -Brown bears are thought to have evolved from Ursus etruscus in Asia.[21][22] The brown bear, per Kurten (1976), has been stated as "clearly derived from the Asian population of Ursus savini about 800,000 years ago; spread into Europe, to the New World."[23] A genetic analysis indicated that the brown bear lineage diverged from the cave bear species complex approximately 1.2–1.4 million years ago, but did not clarify if U. savini persisted as a paraspecies for the brown bear before perishing.[24] The oldest fossils positively identified as from this species occur in China from about 0.5 million years ago. Brown bears entered Europe about 250,000 years ago and North Africa shortly after.[21][25] Brown bear remains from the Pleistocene period are common in the British Isles, where it is thought they might have outcompeted cave bears (Ursus spelaeus). The species entered Alaska 100,000 years ago, though they did not move south until 13,000 years ago.[21] It is speculated that brown bears were unable to migrate south until the extinction of the much larger giant short-faced bear (Arctodus simus).[26][27] - -Several paleontologists suggest the possibility of two separate brown bear migrations. First, the inland brown bears, also known as grizzlies, are thought to stem from narrow-skulled bears which migrated from northern Siberia to central Alaska and the rest of the continent. Moreover, the Kodiak bears descend from broad-skulled bears from Kamchatka, which colonized the Alaskan peninsula. Brown bear fossils discovered in Ontario, Ohio, Kentucky and Labrador show that the species occurred farther east than indicated in historic records.[21] In North America, two types of the subspecies Ursus arctos horribilis are generally recognized—the coastal brown bear and the inland grizzly bear; these two types broadly define the range of sizes of all brown bear subspecies.[13] - -Scientific taxonomy -Main article: Subspecies of brown bear - -Adult female Eurasian brown bear, the nominate subspecies -There are many methods used by scientists to define bear species and subspecies, as no one method is always effective. Brown bear taxonomy and subspecies classification has been described as "formidable and confusing," with few authorities listing the same specific set of subspecies.[28] Genetic testing is now perhaps the most important way to scientifically define brown bear relationships and names. Generally, genetic testing uses the word clade rather than species because a genetic test alone cannot define a biological species. Most genetic studies report on how closely related the bears are (or their genetic distance). There are hundreds of obsolete brown bear subspecies, each with its own name, so this can become confusing. Hall (1981) lists 86 different types, and even as many as 90 have been proposed.[29][30] However, recent DNA analysis has identified as few as five main clades which contain all extant brown bears,[31][32] while a 2017 phylogenetic study revealed nine clades, including one representing polar bears.[33] As of 2005, 15 extant or recently extinct subspecies were recognized by the general scientific community.[34][35] - -As well as the exact number of overall brown bear subspecies, its precise relationship to the polar bear also remains in debate. The polar bear is a recent offshoot of the brown bear. The point at which the polar bear diverged from the brown bear is unclear, with estimations based on genetics and fossils ranging from 400,000 to 70,000 years ago, but most recent analysis has indicated that the polar bear split somewhere between 275,000 and 150,000 years ago.[36] Under some definitions, the brown bear can be construed as the paraspecies for the polar bear.[37][38][39][40] - -DNA analysis shows that, apart from recent human-caused population fragmentation,[41] brown bears in North America are generally part of a single interconnected population system, with the exception of the population (or subspecies) in the Kodiak Archipelago, which has probably been isolated since the end of the last Ice Age.[42][43] These data demonstrate that U. a. gyas, U. a. horribilis, U. a. sitkensis and U. a. stikeenensis are not distinct or cohesive groups, and would more accurately be described as ecotypes. For example, brown bears in any particular region of the Alaska coast are more closely related to adjacent grizzly bears than to distant populations of brown bears,[44] the morphological distinction seemingly driven by brown bears having access to a rich salmon food source, while grizzly bears live at higher elevation, or further from the coast, where plant material is the base of the diet. The history of the bears of the Alexander Archipelago is unusual in that these island populations carry polar bear DNA, presumably originating from a population of polar bears that was left behind at the end of the Pleistocene, but have since been connected with adjacent mainland populations through movement of males, to the point where their nuclear genomes are now more than 90% of brown bear ancestry.[45] - -Brown bears are apparently divided into five different clades, some of which coexist or co-occur in different regions.[3]""" - - def test_long_context_high_compression_summarize_en( long_context_high_compression_summarize: LongContextHighCompressionSummarize, long_text: str, diff --git a/tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py b/tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py index 5f019883d..b5cf18ee2 100644 --- a/tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py +++ b/tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py @@ -1,6 +1,3 @@ -from aleph_alpha_client import Client -from pytest import fixture - from intelligence_layer.core.chunk import Chunk from intelligence_layer.core.detect_language import Language from intelligence_layer.core.tracer import NoOpTracer @@ -10,33 +7,22 @@ from intelligence_layer.use_cases.summarize.summarize import SingleChunkSummarizeInput -@fixture -def single_chunk_few_shot_summarize(client: Client) -> SingleChunkFewShotSummarize: - return SingleChunkFewShotSummarize(client, "luminous-extended", 128) - - def test_high_compression_summarize_en( - single_chunk_few_shot_summarize: SingleChunkFewShotSummarize, + single_chunk_few_shot_summarize: SingleChunkFewShotSummarize, chunk: Chunk ) -> None: - text = Chunk( - "The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]" - ) - input = SingleChunkSummarizeInput(chunk=text, language=Language("en")) + input = SingleChunkSummarizeInput(chunk=chunk, language=Language("en")) output = single_chunk_few_shot_summarize.run(input, NoOpTracer()) assert output.summary assert "bear" in output.summary.lower() - assert len(output.summary) < len(text) + assert len(output.summary) < len(chunk) def test_high_compression_summarize_is_language_sensitive( - single_chunk_few_shot_summarize: SingleChunkFewShotSummarize, + single_chunk_few_shot_summarize: SingleChunkFewShotSummarize, chunk: Chunk ) -> None: - text = Chunk( - "The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]" - ) - input_en = SingleChunkSummarizeInput(chunk=text, language=Language("en")) - input_de = SingleChunkSummarizeInput(chunk=text, language=Language("de")) + input_en = SingleChunkSummarizeInput(chunk=chunk, language=Language("en")) + input_de = SingleChunkSummarizeInput(chunk=chunk, language=Language("de")) output_en, output_de = single_chunk_few_shot_summarize.run_concurrently( [input_en, input_de], NoOpTracer(), concurrency_limit=2 ) diff --git a/tests/use_cases/summarize/test_summarize.py b/tests/use_cases/summarize/test_summarize.py new file mode 100644 index 000000000..e346f7b08 --- /dev/null +++ b/tests/use_cases/summarize/test_summarize.py @@ -0,0 +1,92 @@ +from pytest import fixture + +from intelligence_layer.core.chunk import Chunk +from intelligence_layer.core.detect_language import Language +from intelligence_layer.core.evaluator import Dataset, Example +from intelligence_layer.core.tracer import NoOpTracer +from intelligence_layer.use_cases.summarize.long_context_high_compression_summarize import ( + LongContextHighCompressionSummarize, +) +from intelligence_layer.use_cases.summarize.single_chunk_few_shot_summarize import ( + SingleChunkFewShotSummarize, +) +from intelligence_layer.use_cases.summarize.summarize import ( + LongContextSummarizeEvaluator, + LongContextSummarizeInput, + SingleChunkSummarizeEvaluator, + SingleChunkSummarizeInput, +) + + +@fixture +def single_chunk_summarize_evaluator( + single_chunk_few_shot_summarize: SingleChunkFewShotSummarize, +) -> SingleChunkSummarizeEvaluator: + return SingleChunkSummarizeEvaluator(single_chunk_few_shot_summarize) + + +@fixture +def long_context_summarize_evaluator( + long_context_high_compression_summarize: LongContextHighCompressionSummarize, +) -> LongContextSummarizeEvaluator: + return LongContextSummarizeEvaluator(long_context_high_compression_summarize) + + +def test_single_chunk_summarize_evaluator( + single_chunk_summarize_evaluator: SingleChunkSummarizeEvaluator, + chunk: Chunk, + no_op_tracer: NoOpTracer, +) -> None: + input = SingleChunkSummarizeInput(chunk=chunk, language=Language("en")) + bad_expected_output = "Heute ist das Wetter schön." + good_expected_output = ( + "The brown bear is a large mammal that lives in Eurasia and North America." + ) + outputs = [bad_expected_output, good_expected_output] + dataset = Dataset( + name="summarize_eval_test", + examples=[Example(input=input, expected_output=output) for output in outputs], + ) + aggregated_evaluation = single_chunk_summarize_evaluator.evaluate_dataset( + dataset, no_op_tracer + ) + + assert len(aggregated_evaluation.evaluations) == len(outputs) + assert ( + aggregated_evaluation.evaluations[0].bleu + < aggregated_evaluation.evaluations[1].bleu + ) + assert ( + aggregated_evaluation.evaluations[0].rouge + < aggregated_evaluation.evaluations[1].rouge + ) + + +def test_long_context_summarize_evaluator( + long_context_summarize_evaluator: LongContextSummarizeEvaluator, + long_text: str, + no_op_tracer: NoOpTracer, +) -> None: + input = LongContextSummarizeInput(text=long_text, language=Language("en")) + bad_expected_output = "Heute ist das Wetter schön." + good_expected_output = ( + "The brown bear is a large mammal that lives in Eurasia and North America." + ) + outputs = [bad_expected_output, good_expected_output] + dataset = Dataset( + name="summarize_eval_test", + examples=[Example(input=input, expected_output=output) for output in outputs], + ) + aggregated_evaluation = long_context_summarize_evaluator.evaluate_dataset( + dataset, no_op_tracer + ) + + assert len(aggregated_evaluation.evaluations) == len(outputs) + assert ( + aggregated_evaluation.evaluations[0].bleu + < aggregated_evaluation.evaluations[1].bleu + ) + assert ( + aggregated_evaluation.evaluations[0].rouge + < aggregated_evaluation.evaluations[1].rouge + )