diff --git a/api/Pipfile b/api/Pipfile index 46029cd9d8b..753a89574bc 100644 --- a/api/Pipfile +++ b/api/Pipfile @@ -27,22 +27,20 @@ django-cors-headers = "~=4.2" django-log-request-id = "~=2.0" django-oauth-toolkit = "~=2.3" django-redis = "~=5.3" -django-sslserver = "~=0.22" django-storages = "~=1.13" django-tqdm = "~=1.3" django-uuslug = "~=2.0" djangorestframework = "~=3.14" drf-spectacular = "*" -elasticsearch-dsl = "~=7.4" +elasticsearch = "==8.8.2" +elasticsearch-dsl = "~=8.9" future = "~=0.18" gunicorn = "~=21.2" limit = "~=0.2" -Pillow = "~=10.0" +Pillow = "~=10.0.1" psycopg2 = "~=2.9" python-decouple = "~=3.8" python-xmp-toolkit = "~=2.0" -redlock-py = "~=1.0" -requests-oauthlib = "~=1.3" sentry-sdk = "~=1.30" django-split-settings = "*" diff --git a/api/Pipfile.lock b/api/Pipfile.lock index 8cbec45d370..c17ebed89a7 100644 --- a/api/Pipfile.lock +++ b/api/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ca6ec379595b089de6af8a4fc4eeb7e0075a5bbf424715be5b9e4f723ed9d54f" + "sha256": "695fb28026f336e8706c5085bf0afb67566cb77da71894dbdf9b3565bdc75c27" }, "pipfile-spec": 6, "requires": { @@ -151,19 +151,19 @@ }, "boto3": { "hashes": [ - "sha256:c53c92dfe22489ba31e918c2e7b59ff43e2e778bd3d3559e62351a739382bb5c", - "sha256:eea3b07e0f28c9f92bccab972af24a3b0dd951c69d93da75227b8ecd3e18f6c4" + "sha256:519639859a3c829ccf7073a58b3716cb26cb5906e306fe63eb4beab68bf9bfab", + "sha256:c9fad1b01a1d7e7bd51150b3175b4c32b79d699ce94708082611f59fde2e097a" ], "index": "pypi", - "version": "==1.28.44" + "version": "==1.28.49" }, "botocore": { "hashes": [ - "sha256:83d61c1ca781e6ede19fcc4d5dd73004eee3825a2b220f0d7727e32069209d98", - "sha256:84f90919fecb4a4f417fd10145c8a87ff2c4b14d6381cd34d9babf02110b3315" + "sha256:7d64cb45154e4f34f3a45f551e118caad7379ae831565639e0afe5b2af126c61", + "sha256:95e9716f27f67d4207f260ab0ea157603ca544d3b82c5f21728b1c732bec1817" ], "markers": "python_version >= '3.7'", - "version": "==1.31.44" + "version": "==1.31.49" }, "certifi": { "hashes": [ @@ -354,11 +354,11 @@ }, "deepdiff": { "hashes": [ - "sha256:065cdbbe62f66447cf507b32351579ffcc4a80bb28f567ac27e92a21ddca99f9", - "sha256:744c4e54ff83eaa77a995b3311dccdce6ee67773335a34a5ef269fa048005457" + "sha256:080b1359d6128f3f5f1738c6be3064f0ad9b0cc41994aa90a028065f6ad11f25", + "sha256:acdc1651a3e802415e0337b7e1192df5cd7c17b72fbab480466fdd799b9a72e7" ], "index": "pypi", - "version": "==6.4.1" + "version": "==6.5.0" }, "deprecated": { "hashes": [ @@ -416,13 +416,6 @@ "index": "pypi", "version": "==1.2.0" }, - "django-sslserver": { - "hashes": [ - "sha256:c598a363d2ccdc2421c08ddb3d8b0973f80e8e47a3a5b74e4a2896f21c2947c5" - ], - "index": "pypi", - "version": "==0.22" - }, "django-storages": { "hashes": [ "sha256:11280a883b13812df548f3cfe9c10280afc0d4727c8babdee369a75e71158f16", @@ -462,21 +455,29 @@ "index": "pypi", "version": "==0.26.4" }, + "elastic-transport": { + "hashes": [ + "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815", + "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10" + ], + "markers": "python_version >= '3.6'", + "version": "==8.4.0" + }, "elasticsearch": { "hashes": [ - "sha256:0e2454645dc00517dee4c6de3863411a9c5f1955d013c5fefa29123dadc92f98", - "sha256:66c4ece2adfe7cc120e2b6a6798a1fd5c777aecf82eec39bb95cef7cfc7ea2b3" + "sha256:bed8cf8fcc6c3be7c254b579de4c29afab021f373c832246f912d37aef3c6bd5", + "sha256:bffd6ce4faaacf90e6f617241773b3da8fb94e2e83554f5508e2fab92ca79643" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", - "version": "==7.17.9" + "index": "pypi", + "version": "==8.8.2" }, "elasticsearch-dsl": { "hashes": [ - "sha256:07ee9c87dc28cc3cae2daa19401e1e18a172174ad9e5ca67938f752e3902a1d5", - "sha256:97f79239a252be7c4cce554c29e64695d7ef6a4828372316a5e5ff815e7a7498" + "sha256:66410adf881f02b8a032e8a5b2a3ee093fdeede4b814fbf04c0f6ce0499b7472", + "sha256:ab266bcf84b0f23bd2d73d9b31e054b5d38b20279cf076c53873f46b6dabf747" ], "index": "pypi", - "version": "==7.4.1" + "version": "==8.9.0" }, "frozenlist": { "hashes": [ @@ -720,65 +721,63 @@ }, "pillow": { "hashes": [ - "sha256:00e65f5e822decd501e374b0650146063fbb30a7264b4d2744bdd7b913e0cab5", - "sha256:040586f7d37b34547153fa383f7f9aed68b738992380ac911447bb78f2abe530", - "sha256:0b6eb5502f45a60a3f411c63187db83a3d3107887ad0d036c13ce836f8a36f1d", - "sha256:1ce91b6ec08d866b14413d3f0bbdea7e24dfdc8e59f562bb77bc3fe60b6144ca", - "sha256:1f62406a884ae75fb2f818694469519fb685cc7eaff05d3451a9ebe55c646891", - "sha256:22c10cc517668d44b211717fd9775799ccec4124b9a7f7b3635fc5386e584992", - "sha256:3400aae60685b06bb96f99a21e1ada7bc7a413d5f49bce739828ecd9391bb8f7", - "sha256:349930d6e9c685c089284b013478d6f76e3a534e36ddfa912cde493f235372f3", - "sha256:368ab3dfb5f49e312231b6f27b8820c823652b7cd29cfbd34090565a015e99ba", - "sha256:38250a349b6b390ee6047a62c086d3817ac69022c127f8a5dc058c31ccef17f3", - "sha256:3a684105f7c32488f7153905a4e3015a3b6c7182e106fe3c37fbb5ef3e6994c3", - "sha256:3a82c40d706d9aa9734289740ce26460a11aeec2d9c79b7af87bb35f0073c12f", - "sha256:3b08d4cc24f471b2c8ca24ec060abf4bebc6b144cb89cba638c720546b1cf538", - "sha256:3ed64f9ca2f0a95411e88a4efbd7a29e5ce2cea36072c53dd9d26d9c76f753b3", - "sha256:3f07ea8d2f827d7d2a49ecf1639ec02d75ffd1b88dcc5b3a61bbb37a8759ad8d", - "sha256:520f2a520dc040512699f20fa1c363eed506e94248d71f85412b625026f6142c", - "sha256:5c6e3df6bdd396749bafd45314871b3d0af81ff935b2d188385e970052091017", - "sha256:608bfdee0d57cf297d32bcbb3c728dc1da0907519d1784962c5f0c68bb93e5a3", - "sha256:685ac03cc4ed5ebc15ad5c23bc555d68a87777586d970c2c3e216619a5476223", - "sha256:76de421f9c326da8f43d690110f0e79fe3ad1e54be811545d7d91898b4c8493e", - "sha256:76edb0a1fa2b4745fb0c99fb9fb98f8b180a1bbceb8be49b087e0b21867e77d3", - "sha256:7be600823e4c8631b74e4a0d38384c73f680e6105a7d3c6824fcf226c178c7e6", - "sha256:81ff539a12457809666fef6624684c008e00ff6bf455b4b89fd00a140eecd640", - "sha256:88af2003543cc40c80f6fca01411892ec52b11021b3dc22ec3bc9d5afd1c5334", - "sha256:8c11160913e3dd06c8ffdb5f233a4f254cb449f4dfc0f8f4549eda9e542c93d1", - "sha256:8f8182b523b2289f7c415f589118228d30ac8c355baa2f3194ced084dac2dbba", - "sha256:9211e7ad69d7c9401cfc0e23d49b69ca65ddd898976d660a2fa5904e3d7a9baa", - "sha256:92be919bbc9f7d09f7ae343c38f5bb21c973d2576c1d45600fce4b74bafa7ac0", - "sha256:9c82b5b3e043c7af0d95792d0d20ccf68f61a1fec6b3530e718b688422727396", - "sha256:9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d", - "sha256:9fb218c8a12e51d7ead2a7c9e101a04982237d4855716af2e9499306728fb485", - "sha256:a74ba0c356aaa3bb8e3eb79606a87669e7ec6444be352870623025d75a14a2bf", - "sha256:b4f69b3700201b80bb82c3a97d5e9254084f6dd5fb5b16fc1a7b974260f89f43", - "sha256:bc2ec7c7b5d66b8ec9ce9f720dbb5fa4bace0f545acd34870eff4a369b44bf37", - "sha256:c189af0545965fa8d3b9613cfdb0cd37f9d71349e0f7750e1fd704648d475ed2", - "sha256:c1fbe7621c167ecaa38ad29643d77a9ce7311583761abf7836e1510c580bf3dd", - "sha256:c7cf14a27b0d6adfaebb3ae4153f1e516df54e47e42dcc073d7b3d76111a8d86", - "sha256:c9f72a021fbb792ce98306ffb0c348b3c9cb967dce0f12a49aa4c3d3fdefa967", - "sha256:cd25d2a9d2b36fcb318882481367956d2cf91329f6892fe5d385c346c0649629", - "sha256:ce543ed15570eedbb85df19b0a1a7314a9c8141a36ce089c0a894adbfccb4568", - "sha256:ce7b031a6fc11365970e6a5686d7ba8c63e4c1cf1ea143811acbb524295eabed", - "sha256:d35e3c8d9b1268cbf5d3670285feb3528f6680420eafe35cccc686b73c1e330f", - "sha256:d50b6aec14bc737742ca96e85d6d0a5f9bfbded018264b3b70ff9d8c33485551", - "sha256:d5d0dae4cfd56969d23d94dc8e89fb6a217be461c69090768227beb8ed28c0a3", - "sha256:d5db32e2a6ccbb3d34d87c87b432959e0db29755727afb37290e10f6e8e62614", - "sha256:d72e2ecc68a942e8cf9739619b7f408cc7b272b279b56b2c83c6123fcfa5cdff", - "sha256:d737a602fbd82afd892ca746392401b634e278cb65d55c4b7a8f48e9ef8d008d", - "sha256:d80cf684b541685fccdd84c485b31ce73fc5c9b5d7523bf1394ce134a60c6883", - "sha256:db24668940f82321e746773a4bc617bfac06ec831e5c88b643f91f122a785684", - "sha256:dbc02381779d412145331789b40cc7b11fdf449e5d94f6bc0b080db0a56ea3f0", - "sha256:dffe31a7f47b603318c609f378ebcd57f1554a3a6a8effbc59c3c69f804296de", - "sha256:edf4392b77bdc81f36e92d3a07a5cd072f90253197f4a52a55a8cec48a12483b", - "sha256:efe8c0681042536e0d06c11f48cebe759707c9e9abf880ee213541c5b46c5bf3", - "sha256:f31f9fdbfecb042d046f9d91270a0ba28368a723302786c0009ee9b9f1f60199", - "sha256:f88a0b92277de8e3ca715a0d79d68dc82807457dae3ab8699c758f07c20b3c51", - "sha256:faaf07ea35355b01a35cb442dd950d8f1bb5b040a7787791a535de13db15ed90" + "sha256:0462b1496505a3462d0f35dc1c4d7b54069747d65d00ef48e736acda2c8cbdff", + "sha256:186f7e04248103482ea6354af6d5bcedb62941ee08f7f788a1c7707bc720c66f", + "sha256:19e9adb3f22d4c416e7cd79b01375b17159d6990003633ff1d8377e21b7f1b21", + "sha256:28444cb6ad49726127d6b340217f0627abc8732f1194fd5352dec5e6a0105635", + "sha256:2872f2d7846cf39b3dbff64bc1104cc48c76145854256451d33c5faa55c04d1a", + "sha256:2cc6b86ece42a11f16f55fe8903595eff2b25e0358dec635d0a701ac9586588f", + "sha256:2d7e91b4379f7a76b31c2dda84ab9e20c6220488e50f7822e59dac36b0cd92b1", + "sha256:2fa6dd2661838c66f1a5473f3b49ab610c98a128fc08afbe81b91a1f0bf8c51d", + "sha256:32bec7423cdf25c9038fef614a853c9d25c07590e1a870ed471f47fb80b244db", + "sha256:3855447d98cced8670aaa63683808df905e956f00348732448b5a6df67ee5849", + "sha256:3a04359f308ebee571a3127fdb1bd01f88ba6f6fb6d087f8dd2e0d9bff43f2a7", + "sha256:3a0d3e54ab1df9df51b914b2233cf779a5a10dfd1ce339d0421748232cea9876", + "sha256:44e7e4587392953e5e251190a964675f61e4dae88d1e6edbe9f36d6243547ff3", + "sha256:459307cacdd4138edee3875bbe22a2492519e060660eaf378ba3b405d1c66317", + "sha256:4ce90f8a24e1c15465048959f1e94309dfef93af272633e8f37361b824532e91", + "sha256:50bd5f1ebafe9362ad622072a1d2f5850ecfa44303531ff14353a4059113b12d", + "sha256:522ff4ac3aaf839242c6f4e5b406634bfea002469656ae8358644fc6c4856a3b", + "sha256:552912dbca585b74d75279a7570dd29fa43b6d93594abb494ebb31ac19ace6bd", + "sha256:5d6c9049c6274c1bb565021367431ad04481ebb54872edecfcd6088d27edd6ed", + "sha256:697a06bdcedd473b35e50a7e7506b1d8ceb832dc238a336bd6f4f5aa91a4b500", + "sha256:71671503e3015da1b50bd18951e2f9daf5b6ffe36d16f1eb2c45711a301521a7", + "sha256:723bd25051454cea9990203405fa6b74e043ea76d4968166dfd2569b0210886a", + "sha256:764d2c0daf9c4d40ad12fbc0abd5da3af7f8aa11daf87e4fa1b834000f4b6b0a", + "sha256:787bb0169d2385a798888e1122c980c6eff26bf941a8ea79747d35d8f9210ca0", + "sha256:7f771e7219ff04b79e231d099c0a28ed83aa82af91fd5fa9fdb28f5b8d5addaf", + "sha256:847e8d1017c741c735d3cd1883fa7b03ded4f825a6e5fcb9378fd813edee995f", + "sha256:84efb46e8d881bb06b35d1d541aa87f574b58e87f781cbba8d200daa835b42e1", + "sha256:898f1d306298ff40dc1b9ca24824f0488f6f039bc0e25cfb549d3195ffa17088", + "sha256:8b451d6ead6e3500b6ce5c7916a43d8d8d25ad74b9102a629baccc0808c54971", + "sha256:8f06be50669087250f319b706decf69ca71fdecd829091a37cc89398ca4dc17a", + "sha256:92a23b0431941a33242b1f0ce6c88a952e09feeea9af4e8be48236a68ffe2205", + "sha256:93139acd8109edcdeffd85e3af8ae7d88b258b3a1e13a038f542b79b6d255c54", + "sha256:98533fd7fa764e5f85eebe56c8e4094db912ccbe6fbf3a58778d543cadd0db08", + "sha256:9f665d1e6474af9f9da5e86c2a3a2d2d6204e04d5af9c06b9d42afa6ebde3f21", + "sha256:b059ac2c4c7a97daafa7dc850b43b2d3667def858a4f112d1aa082e5c3d6cf7d", + "sha256:b1be1c872b9b5fcc229adeadbeb51422a9633abd847c0ff87dc4ef9bb184ae08", + "sha256:b7cf63d2c6928b51d35dfdbda6f2c1fddbe51a6bc4a9d4ee6ea0e11670dd981e", + "sha256:bc2e3069569ea9dbe88d6b8ea38f439a6aad8f6e7a6283a38edf61ddefb3a9bf", + "sha256:bcf1207e2f2385a576832af02702de104be71301c2696d0012b1b93fe34aaa5b", + "sha256:ca26ba5767888c84bf5a0c1a32f069e8204ce8c21d00a49c90dabeba00ce0145", + "sha256:cbe68deb8580462ca0d9eb56a81912f59eb4542e1ef8f987405e35a0179f4ea2", + "sha256:d6caf3cd38449ec3cd8a68b375e0c6fe4b6fd04edb6c9766b55ef84a6e8ddf2d", + "sha256:d72967b06be9300fed5cfbc8b5bafceec48bf7cdc7dab66b1d2549035287191d", + "sha256:d889b53ae2f030f756e61a7bff13684dcd77e9af8b10c6048fb2c559d6ed6eaf", + "sha256:de596695a75496deb3b499c8c4f8e60376e0516e1a774e7bc046f0f48cd620ad", + "sha256:e6a90167bcca1216606223a05e2cf991bb25b14695c518bc65639463d7db722d", + "sha256:ed2d9c0704f2dc4fa980b99d565c0c9a543fe5101c25b3d60488b8ba80f0cce1", + "sha256:ee7810cf7c83fa227ba9125de6084e5e8b08c59038a7b2c9045ef4dde61663b4", + "sha256:f0b4b06da13275bc02adfeb82643c4a6385bd08d26f03068c2796f60d125f6f2", + "sha256:f11c9102c56ffb9ca87134bd025a43d2aba3f1155f508eff88f694b33a9c6d19", + "sha256:f5bb289bb835f9fe1a1e9300d011eef4d69661bb9b34d5e196e5e82c4cb09b37", + "sha256:f6d3d4c905e26354e8f9d82548475c46d8e0889538cb0657aa9c6f0872a37aa4", + "sha256:fcb59711009b0168d6ee0bd8fb5eb259c4ab1717b2f538bbf36bacf207ef7a68", + "sha256:fd2a5403a75b54661182b75ec6132437a181209b901446ee5724b589af8edef1" ], "index": "pypi", - "version": "==10.0.0" + "version": "==10.0.1" }, "psycopg2": { "hashes": [ @@ -906,13 +905,6 @@ "markers": "python_version >= '3.7'", "version": "==5.0.0" }, - "redlock-py": { - "hashes": [ - "sha256:0b8722c4843ddeabc2fc1dd37c05859e0da29fbce3bd1f6ecc73c98396f139ac" - ], - "index": "pypi", - "version": "==1.0.8" - }, "referencing": { "hashes": [ "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf", @@ -929,116 +921,108 @@ "markers": "python_version >= '3.7'", "version": "==2.31.0" }, - "requests-oauthlib": { - "hashes": [ - "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5", - "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a" - ], - "index": "pypi", - "version": "==1.3.1" - }, "rpds-py": { "hashes": [ - "sha256:00e97d43a36811b78fa9ad9d3329bf34f76a31e891a7031a2ac01450c9b168ab", - "sha256:013d6c784150d10236a74b4094a79d96a256b814457e388fc5a4ba9efe24c402", - "sha256:0188b580c490bccb031e9b67e9e8c695a3c44ac5e06218b152361eca847317c3", - "sha256:02945ae38fd78efc40900f509890de84cfd5ffe2cd2939eeb3a8800dc68b87cb", - "sha256:02b4a2e28eb24dac4ef43dda4f6a6f7766e355179b143f7d0c76a1c5488a307b", - "sha256:0527c97dcd8bb983822ee31d3760187083fd3ba18ac4dd22cf5347c89d5628f4", - "sha256:05a1382905026bdd560f806c8c7c16e0f3e3fb359ba8868203ca6e5799884968", - "sha256:0b309908b6ff5ffbf6394818cb73b5a2a74073acee2c57fe8719046389aeff0d", - "sha256:0fc625059b83695fbb4fc8b7a8b66fa94ff9c7b78c84fb9986cd53ff88a28d80", - "sha256:177c033e467a66a054dd3a9534167234a3d0b2e41445807b13b626e01da25d92", - "sha256:18909093944727e068ebfc92e2e6ed1c4fa44135507c1c0555213ce211c53214", - "sha256:1adb04e4b4e41bf30aaa77eeb169c1b9ba9e5010e2e6ce8d6c17e1446edc9b68", - "sha256:1ed3d5385d14be894e12a9033be989e012214a9811e7194849c94032ad69682a", - "sha256:203eb1532d51591d32e8dfafd60b5d31347ea7278c8da02b4b550287f6abe28b", - "sha256:213f9ef5c02ec2f883c1075d25a873149daadbaea50d18d622e9db55ec9849c2", - "sha256:2275f1a022e2383da5d2d101fe11ccdcbae799148c4b83260a4b9309fa3e1fc2", - "sha256:22e6de18f00583f06928cc8d0993104ecc62f7c6da6478db2255de89a30e45d1", - "sha256:289073f68452b96e70990085324be7223944c7409973d13ddfe0eea1c1b5663b", - "sha256:29ec8507664f94cc08457d98cfc41c3cdbddfa8952438e644177a29b04937876", - "sha256:2a55631b93e47956fbc97d69ba2054a8c6a4016f9a3064ec4e031f5f1030cb90", - "sha256:2a86d246a160d98d820ee7d02dc18c923c228de095be362e57b9fd8970b2c4a1", - "sha256:2bca97521ee786087f0c5ef318fef3eef0266a9c3deff88205523cf353af7394", - "sha256:2c8fc6c841ada60a86d29c9ebe2e8757c47eda6553f3596c560e59ca6e9b6fa1", - "sha256:2cd0c9fb5d40887500b4ed818770c68ab4fa6e0395d286f9704be6751b1b7d98", - "sha256:2d27d08056fcd61ff47a0cd8407eff4d3e816c82cb6b9c6f0ce9a0ad49225f81", - "sha256:2ffbf1b38c88d0466de542e91b08225d51782282512f8e2b11715126c41fda48", - "sha256:3fd503c27e7b7034128e30847ecdb4bff4ca5e60f29ad022a9f66ae8940d54ac", - "sha256:3ff1f585a0fdc1415bd733b804f33d386064a308672249b14828130dd43e7c31", - "sha256:41bd430b7b63aa802c02964e331ac0b177148fef5f807d2c90d05ce71a52b4d4", - "sha256:43e9b1531d6a898bdf086acb75c41265c7ec4331267d7619148d407efc72bd24", - "sha256:46af4a742b90c7460e94214f923452c2c1d050a9da1d2b8d4c70cbc045e692b7", - "sha256:46c4c550bf59ce05d6bff2c98053822549aaf9fbaf81103edea325e03350bca1", - "sha256:4969592e3cdeefa4cbb15a26cec102cbd4a1d6e5b695fac9fa026e19741138c8", - "sha256:4a0536ed2b9297c75104e1a3da330828ba1b2639fa53b38d396f98bf7e3c68df", - "sha256:4a96147791e49e84207dd1530109aa0e9eeaf1c8b7a59f150047fc0fcdf9bb64", - "sha256:4c7f9d70f99e1fbcbf57c75328b80e1c0a7f6cad43e75efa90a97221be5efe15", - "sha256:4e8474f7233fe1949ce4e03bea698a600c2d5d6b51dab6d6e6336dbe69acf23e", - "sha256:4f1b804cfad04f862d6a84af9d1ad941b06f671878f0f7ecad6c92007d423de6", - "sha256:529aab727f54a937085184e7436e1d0e19975cf10115eda12d37a683e4ee5342", - "sha256:5612b0b1de8d5114520094bd5fc3d04eb8af6f3e10d48ef05b7c8e77c1fd9545", - "sha256:56777c57246e048908b550af9b81b0ec9cf804fd47cb7502ccd93238bd6025c2", - "sha256:56ba7c1100ed079527f2b995bf5486a2e557e6d5b733c52e8947476338815b69", - "sha256:59d222086daa55421d599609b32d0ebe544e57654c4a0a1490c54a7ebaa67561", - "sha256:5aba767e64b494483ad60c4873bec78d16205a21f8247c99749bd990d9c846c2", - "sha256:5d5eaf988951f6ecb6854ca3300b87123599c711183c83da7ce39717a7cbdbce", - "sha256:73da69e1f612c3e682e34dcb971272d90d6f27b2c99acff444ca455a89978574", - "sha256:75c8766734ac0053e1d683567e65e85306c4ec62631b0591caeb287ac8f72e08", - "sha256:75eea40355a8690459c7291ce6c8ce39c27bd223675c7da6619f510c728feb97", - "sha256:80c3cf46511653f94dfe07c7c79ab105c4164d6e1dfcb35b7214fb9af53eaef4", - "sha256:8557c807388e6617161fe51b1a4747ea8d1133f2d2ad8e79583439abebe58fbd", - "sha256:89438e8885a186c69fe31f7ef98bb2bf29688c466c3caf9060f404c0be89ae80", - "sha256:899b03a3be785a7e1ff84b237da71f0efa2f021512f147dd34ffdf7aa82cb678", - "sha256:8de9b88f0cbac73cfed34220d13c57849e62a7099a714b929142425e926d223a", - "sha256:8f4d561f4728f825e3b793a53064b606ca0b6fc264f67d09e54af452aafc5b82", - "sha256:907b214da5d2fcff0b6ddb83de1333890ca92abaf4bbf8d9c61dc1b95c87fd6e", - "sha256:9118de88c16947eaf5b92f749e65b0501ea69e7c2be7bd6aefc12551622360e1", - "sha256:9568764e72d85cf7855ca78b48e07ed1be47bf230e2cea8dabda3c95f660b0ff", - "sha256:9c74cbee9e532dc34371127f7686d6953e5153a1f22beab7f953d95ee4a0fe09", - "sha256:9cdfd649011ce2d90cb0dd304c5aba1190fac0c266d19a9e2b96b81cfd150a09", - "sha256:9f00d54b18dd837f1431d66b076737deb7c29ce3ebb8412ceaf44d5e1954ac0c", - "sha256:a2f416cdfe92f5fbb77177f5f3f7830059d1582db05f2c7119bf80069d1ab69b", - "sha256:a4cb372e22e9c879bd9a9cc9b20b7c1fbf30a605ac953da45ecec05d8a6e1c77", - "sha256:a65de5c02884760a14a58304fb6303f9ddfc582e630f385daea871e1bdb18686", - "sha256:aa3b3a43dabc4cc57a7800f526cbe03f71c69121e21b863fdf497b59b462b163", - "sha256:ab0f7aabdbce4a202e013083eeab71afdb85efa405dc4a06fea98cde81204675", - "sha256:abe081453166e206e3a8c6d8ace57214c17b6d9477d7601ac14a365344dbc1f4", - "sha256:ae141c9017f8f473a6ee07a9425da021816a9f8c0683c2e5442f0ccf56b0fc62", - "sha256:af52078719209bef33e38131486fd784832dd8d1dc9b85f00a44f6e7437dd021", - "sha256:b00150a9a3fd0a8efaa90bc2696c105b04039d50763dd1c95a34c88c5966cb57", - "sha256:b2660000e1a113869c86eb5cc07f3343467490f3cd9d0299f81da9ddae7137b7", - "sha256:b3eb1a0d2b6d232d1bcdfc3fcc5f7b004ab3fbd9203011a3172f051d4527c0b6", - "sha256:b589d93a60e78fe55d5bc76ee8c2bf945dbdbb7cd16044c53e0307604e448de1", - "sha256:b8578fc6c8bdd0201327503720fa581000b4bd3934abbf07e2628d1ad3de157d", - "sha256:ba1b28e44f611f3f2b436bd8290050a61db4b59a8e24be4465f44897936b3824", - "sha256:bb44644371eaa29a3aba7b69b1862d0d56f073bb7585baa32e4271a71a91ee82", - "sha256:bcde80aefe7054fad6277762fb7e9d35c72ea479a485ae1bb14629c640987b30", - "sha256:bd1142d22fdb183a0fff66d79134bf644401437fed874f81066d314c67ee193c", - "sha256:bf77f9017fcfa1232f98598a637406e6c33982ccba8a5922339575c3e2b90ea5", - "sha256:c2772bb95062e3f9774140205cd65d8997e39620715486cf5f843cf4ad8f744c", - "sha256:c4ecc4e9a5d73a816cae36ee6b5d8b7a0c72013cae1e101406e832887c3dc2d8", - "sha256:c86231c66e4f422e7c13ea6200bb4048b3016c8bfd11b4fd0dabd04d2c8e3501", - "sha256:c8f6526df47953b07c45b95c4d1da6b9a0861c0e5da0271db96bb1d807825412", - "sha256:ccfb77f6dc8abffa6f1c7e3975ed9070a41ce5fcc11154d2bead8c1baa940f09", - "sha256:d9d7efaad48b859053b90dedd69bc92f2095084251e732e4c57ac9726bcb1e64", - "sha256:dd91a7d7a9ce7f4983097c91ce211f3e5569cc21caa16f2692298a07e396f82b", - "sha256:de4a2fd524993578fe093044f291b4b24aab134390030b3b9b5f87fd41ab7e75", - "sha256:df61f818edf7c8626bfa392f825860fb670b5f8336e238eb0ec7e2a5689cdded", - "sha256:e1147bc3d0dd1e549d991110d0a09557ec9f925dbc1ca62871fcdab2ec9d716b", - "sha256:e1954f4b239d1a92081647eecfd51cbfd08ea16eb743b8af1cd0113258feea14", - "sha256:e281b71922208e00886e4b7ffbfcf27874486364f177418ab676f102130e7ec9", - "sha256:e69737bd56006a86fd5a78b2b85447580a6138c930a75eb9ef39fe03d90782b1", - "sha256:e82b4a70cc67094f3f3fd77579702f48fcf1de7bdc67d79b8f1e24d089a6162c", - "sha256:e92e5817eb6bfed23aa5e45bfe30647b83602bdd6f9e25d63524d4e6258458b0", - "sha256:eaba0613c759ebf95988a84f766ca6b7432d55ce399194f95dde588ad1be0878", - "sha256:edd74b760a6bb950397e7a7bd2f38e6700f6525062650b1d77c6d851b82f02c2", - "sha256:f40abbcc0a7d9a8a80870af839d317e6932533f98682aabd977add6c53beeb23", - "sha256:fce7a8ee8d0f682c953c0188735d823f0fcb62779bf92cd6ba473a8e730e26ad" + "sha256:015de2ce2af1586ff5dc873e804434185199a15f7d96920ce67e50604592cae9", + "sha256:061c3ff1f51ecec256e916cf71cc01f9975af8fb3af9b94d3c0cc8702cfea637", + "sha256:08a80cf4884920863623a9ee9a285ee04cef57ebedc1cc87b3e3e0f24c8acfe5", + "sha256:09362f86ec201288d5687d1dc476b07bf39c08478cde837cb710b302864e7ec9", + "sha256:0bb4f48bd0dd18eebe826395e6a48b7331291078a879295bae4e5d053be50d4c", + "sha256:106af1653007cc569d5fbb5f08c6648a49fe4de74c2df814e234e282ebc06957", + "sha256:11fdd1192240dda8d6c5d18a06146e9045cb7e3ba7c06de6973000ff035df7c6", + "sha256:16a472300bc6c83fe4c2072cc22b3972f90d718d56f241adabc7ae509f53f154", + "sha256:176287bb998fd1e9846a9b666e240e58f8d3373e3bf87e7642f15af5405187b8", + "sha256:177914f81f66c86c012311f8c7f46887ec375cfcfd2a2f28233a3053ac93a569", + "sha256:177c9dd834cdf4dc39c27436ade6fdf9fe81484758885f2d616d5d03c0a83bd2", + "sha256:187700668c018a7e76e89424b7c1042f317c8df9161f00c0c903c82b0a8cac5c", + "sha256:1d9b5ee46dcb498fa3e46d4dfabcb531e1f2e76b477e0d99ef114f17bbd38453", + "sha256:22da15b902f9f8e267020d1c8bcfc4831ca646fecb60254f7bc71763569f56b1", + "sha256:24cd91a03543a0f8d09cb18d1cb27df80a84b5553d2bd94cba5979ef6af5c6e7", + "sha256:255f1a10ae39b52122cce26ce0781f7a616f502feecce9e616976f6a87992d6b", + "sha256:271c360fdc464fe6a75f13ea0c08ddf71a321f4c55fc20a3fe62ea3ef09df7d9", + "sha256:2ed83d53a8c5902ec48b90b2ac045e28e1698c0bea9441af9409fc844dc79496", + "sha256:2f3e1867dd574014253b4b8f01ba443b9c914e61d45f3674e452a915d6e929a3", + "sha256:35fbd23c1c8732cde7a94abe7fb071ec173c2f58c0bd0d7e5b669fdfc80a2c7b", + "sha256:37d0c59548ae56fae01c14998918d04ee0d5d3277363c10208eef8c4e2b68ed6", + "sha256:39d05e65f23a0fe897b6ac395f2a8d48c56ac0f583f5d663e0afec1da89b95da", + "sha256:3ad59efe24a4d54c2742929001f2d02803aafc15d6d781c21379e3f7f66ec842", + "sha256:3aed39db2f0ace76faa94f465d4234aac72e2f32b009f15da6492a561b3bbebd", + "sha256:3bbac1953c17252f9cc675bb19372444aadf0179b5df575ac4b56faaec9f6294", + "sha256:40bc802a696887b14c002edd43c18082cb7b6f9ee8b838239b03b56574d97f71", + "sha256:42f712b4668831c0cd85e0a5b5a308700fe068e37dcd24c0062904c4e372b093", + "sha256:448a66b8266de0b581246ca7cd6a73b8d98d15100fb7165974535fa3b577340e", + "sha256:485301ee56ce87a51ccb182a4b180d852c5cb2b3cb3a82f7d4714b4141119d8c", + "sha256:485747ee62da83366a44fbba963c5fe017860ad408ccd6cd99aa66ea80d32b2e", + "sha256:4cf0855a842c5b5c391dd32ca273b09e86abf8367572073bd1edfc52bc44446b", + "sha256:4eca20917a06d2fca7628ef3c8b94a8c358f6b43f1a621c9815243462dcccf97", + "sha256:4ed172d0c79f156c1b954e99c03bc2e3033c17efce8dd1a7c781bc4d5793dfac", + "sha256:5267cfda873ad62591b9332fd9472d2409f7cf02a34a9c9cb367e2c0255994bf", + "sha256:52b5cbc0469328e58180021138207e6ec91d7ca2e037d3549cc9e34e2187330a", + "sha256:53d7a3cd46cdc1689296348cb05ffd4f4280035770aee0c8ead3bbd4d6529acc", + "sha256:563646d74a4b4456d0cf3b714ca522e725243c603e8254ad85c3b59b7c0c4bf0", + "sha256:570cc326e78ff23dec7f41487aa9c3dffd02e5ee9ab43a8f6ccc3df8f9327623", + "sha256:5aca759ada6b1967fcfd4336dcf460d02a8a23e6abe06e90ea7881e5c22c4de6", + "sha256:5de11c041486681ce854c814844f4ce3282b6ea1656faae19208ebe09d31c5b8", + "sha256:5e271dd97c7bb8eefda5cca38cd0b0373a1fea50f71e8071376b46968582af9b", + "sha256:642ed0a209ced4be3a46f8cb094f2d76f1f479e2a1ceca6de6346a096cd3409d", + "sha256:6446002739ca29249f0beaaf067fcbc2b5aab4bc7ee8fb941bd194947ce19aff", + "sha256:691d50c99a937709ac4c4cd570d959a006bd6a6d970a484c84cc99543d4a5bbb", + "sha256:69b857a7d8bd4f5d6e0db4086da8c46309a26e8cefdfc778c0c5cc17d4b11e08", + "sha256:6ac3fefb0d168c7c6cab24fdfc80ec62cd2b4dfd9e65b84bdceb1cb01d385c33", + "sha256:6c9141af27a4e5819d74d67d227d5047a20fa3c7d4d9df43037a955b4c748ec5", + "sha256:7170cbde4070dc3c77dec82abf86f3b210633d4f89550fa0ad2d4b549a05572a", + "sha256:763ad59e105fca09705d9f9b29ecffb95ecdc3b0363be3bb56081b2c6de7977a", + "sha256:77076bdc8776a2b029e1e6ffbe6d7056e35f56f5e80d9dc0bad26ad4a024a762", + "sha256:7cd020b1fb41e3ab7716d4d2c3972d4588fdfbab9bfbbb64acc7078eccef8860", + "sha256:821392559d37759caa67d622d0d2994c7a3f2fb29274948ac799d496d92bca73", + "sha256:829e91f3a8574888b73e7a3feb3b1af698e717513597e23136ff4eba0bc8387a", + "sha256:850c272e0e0d1a5c5d73b1b7871b0a7c2446b304cec55ccdb3eaac0d792bb065", + "sha256:87d9b206b1bd7a0523375dc2020a6ce88bca5330682ae2fe25e86fd5d45cea9c", + "sha256:8bd01ff4032abaed03f2db702fa9a61078bee37add0bd884a6190b05e63b028c", + "sha256:8d54bbdf5d56e2c8cf81a1857250f3ea132de77af543d0ba5dce667183b61fec", + "sha256:8efaeb08ede95066da3a3e3c420fcc0a21693fcd0c4396d0585b019613d28515", + "sha256:8f94fdd756ba1f79f988855d948ae0bad9ddf44df296770d9a58c774cfbcca72", + "sha256:95cde244e7195b2c07ec9b73fa4c5026d4a27233451485caa1cd0c1b55f26dbd", + "sha256:975382d9aa90dc59253d6a83a5ca72e07f4ada3ae3d6c0575ced513db322b8ec", + "sha256:9dd9d9d9e898b9d30683bdd2b6c1849449158647d1049a125879cb397ee9cd12", + "sha256:a019a344312d0b1f429c00d49c3be62fa273d4a1094e1b224f403716b6d03be1", + "sha256:a4d9bfda3f84fc563868fe25ca160c8ff0e69bc4443c5647f960d59400ce6557", + "sha256:a657250807b6efd19b28f5922520ae002a54cb43c2401e6f3d0230c352564d25", + "sha256:a771417c9c06c56c9d53d11a5b084d1de75de82978e23c544270ab25e7c066ff", + "sha256:aad6ed9e70ddfb34d849b761fb243be58c735be6a9265b9060d6ddb77751e3e8", + "sha256:ae87137951bb3dc08c7d8bfb8988d8c119f3230731b08a71146e84aaa919a7a9", + "sha256:af247fd4f12cca4129c1b82090244ea5a9d5bb089e9a82feb5a2f7c6a9fe181d", + "sha256:b5d4bdd697195f3876d134101c40c7d06d46c6ab25159ed5cbd44105c715278a", + "sha256:b9255e7165083de7c1d605e818025e8860636348f34a79d84ec533546064f07e", + "sha256:c22211c165166de6683de8136229721f3d5c8606cc2c3d1562da9a3a5058049c", + "sha256:c55f9821f88e8bee4b7a72c82cfb5ecd22b6aad04033334f33c329b29bfa4da0", + "sha256:c7aed97f2e676561416c927b063802c8a6285e9b55e1b83213dfd99a8f4f9e48", + "sha256:cd2163f42868865597d89399a01aa33b7594ce8e2c4a28503127c81a2f17784e", + "sha256:ce5e7504db95b76fc89055c7f41e367eaadef5b1d059e27e1d6eabf2b55ca314", + "sha256:cff7351c251c7546407827b6a37bcef6416304fc54d12d44dbfecbb717064717", + "sha256:d27aa6bbc1f33be920bb7adbb95581452cdf23005d5611b29a12bb6a3468cc95", + "sha256:d3b52a67ac66a3a64a7e710ba629f62d1e26ca0504c29ee8cbd99b97df7079a8", + "sha256:de61e424062173b4f70eec07e12469edde7e17fa180019a2a0d75c13a5c5dc57", + "sha256:e10e6a1ed2b8661201e79dff5531f8ad4cdd83548a0f81c95cf79b3184b20c33", + "sha256:e1a0ffc39f51aa5f5c22114a8f1906b3c17eba68c5babb86c5f77d8b1bba14d1", + "sha256:e22491d25f97199fc3581ad8dd8ce198d8c8fdb8dae80dea3512e1ce6d5fa99f", + "sha256:e626b864725680cd3904414d72e7b0bd81c0e5b2b53a5b30b4273034253bb41f", + "sha256:e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9", + "sha256:e8d0f0eca087630d58b8c662085529781fd5dc80f0a54eda42d5c9029f812599", + "sha256:ea65b59882d5fa8c74a23f8960db579e5e341534934f43f3b18ec1839b893e41", + "sha256:ea93163472db26ac6043e8f7f93a05d9b59e0505c760da2a3cd22c7dd7111391", + "sha256:eab75a8569a095f2ad470b342f2751d9902f7944704f0571c8af46bede438475", + "sha256:ed8313809571a5463fd7db43aaca68ecb43ca7a58f5b23b6e6c6c5d02bdc7882", + "sha256:ef5fddfb264e89c435be4adb3953cef5d2936fdeb4463b4161a6ba2f22e7b740", + "sha256:ef750a20de1b65657a1425f77c525b0183eac63fe7b8f5ac0dd16f3668d3e64f", + "sha256:efb9ece97e696bb56e31166a9dd7919f8f0c6b31967b454718c6509f29ef6fee", + "sha256:f4c179a7aeae10ddf44c6bac87938134c1379c49c884529f090f9bf05566c836", + "sha256:f602881d80ee4228a2355c68da6b296a296cd22bbb91e5418d54577bbf17fa7c", + "sha256:fc2200e79d75b5238c8d69f6a30f8284290c777039d331e7340b6c17cad24a5a", + "sha256:fcc1ebb7561a3e24a6588f7c6ded15d80aec22c66a070c757559b57b17ffd1cb" ], "markers": "python_version >= '3.8'", - "version": "==0.10.2" + "version": "==0.10.3" }, "s3transfer": { "hashes": [ @@ -1050,11 +1034,11 @@ }, "sentry-sdk": { "hashes": [ - "sha256:2e53ad63f96bb9da6570ba2e755c267e529edcf58580a2c0d2a11ef26e1e678b", - "sha256:7dc873b87e1faf4d00614afd1058bfa1522942f33daef8a59f90de8ed75cd10c" + "sha256:64a7141005fb775b9db298a30de93e3b83e0ddd1232dc6f36eb38aebc1553291", + "sha256:6de2e88304873484207fed836388e422aeff000609b104c802749fd89d56ba5b" ], "index": "pypi", - "version": "==1.30.0" + "version": "==1.31.0" }, "six": { "hashes": [ @@ -1087,14 +1071,6 @@ "markers": "python_version >= '3.7'", "version": "==4.66.1" }, - "typing-extensions": { - "hashes": [ - "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", - "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2" - ], - "markers": "python_version < '3.11'", - "version": "==4.7.1" - }, "uritemplate": { "hashes": [ "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", @@ -1274,14 +1250,6 @@ } }, "develop": { - "appnope": { - "hashes": [ - "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", - "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" - ], - "markers": "sys_platform == 'darwin'", - "version": "==0.1.3" - }, "asttokens": { "hashes": [ "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e", @@ -1289,14 +1257,6 @@ ], "version": "==2.4.0" }, - "async-timeout": { - "hashes": [ - "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", - "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" - ], - "markers": "python_version >= '3.7'", - "version": "==4.0.3" - }, "attrs": { "hashes": [ "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04", @@ -1348,14 +1308,6 @@ "markers": "python_version >= '3.5'", "version": "==5.1.1" }, - "exceptiongroup": { - "hashes": [ - "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9", - "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3" - ], - "markers": "python_version < '3.11'", - "version": "==1.1.3" - }, "executing": { "hashes": [ "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", @@ -1373,11 +1325,11 @@ }, "faker": { "hashes": [ - "sha256:7cf705758f6cc5dd31f628e323f306a6d881e9a8a103f1e32e5f30a4cad0974c", - "sha256:d79d5ea59f31e00fbb882546840a4adb2fd0bae99b103db1ba5869f176bc530b" + "sha256:5d6b7880b3bea708075ddf91938424453f07053a59f8fa0453c1870df6ff3292", + "sha256:64c8513c53c3a809075ee527b323a0ba61517814123f3137e4912f5d43350139" ], "markers": "python_version >= '3.8'", - "version": "==19.6.0" + "version": "==19.6.1" }, "fakeredis": { "hashes": [ @@ -1666,106 +1618,106 @@ }, "rpds-py": { "hashes": [ - "sha256:00e97d43a36811b78fa9ad9d3329bf34f76a31e891a7031a2ac01450c9b168ab", - "sha256:013d6c784150d10236a74b4094a79d96a256b814457e388fc5a4ba9efe24c402", - "sha256:0188b580c490bccb031e9b67e9e8c695a3c44ac5e06218b152361eca847317c3", - "sha256:02945ae38fd78efc40900f509890de84cfd5ffe2cd2939eeb3a8800dc68b87cb", - "sha256:02b4a2e28eb24dac4ef43dda4f6a6f7766e355179b143f7d0c76a1c5488a307b", - "sha256:0527c97dcd8bb983822ee31d3760187083fd3ba18ac4dd22cf5347c89d5628f4", - "sha256:05a1382905026bdd560f806c8c7c16e0f3e3fb359ba8868203ca6e5799884968", - "sha256:0b309908b6ff5ffbf6394818cb73b5a2a74073acee2c57fe8719046389aeff0d", - "sha256:0fc625059b83695fbb4fc8b7a8b66fa94ff9c7b78c84fb9986cd53ff88a28d80", - "sha256:177c033e467a66a054dd3a9534167234a3d0b2e41445807b13b626e01da25d92", - "sha256:18909093944727e068ebfc92e2e6ed1c4fa44135507c1c0555213ce211c53214", - "sha256:1adb04e4b4e41bf30aaa77eeb169c1b9ba9e5010e2e6ce8d6c17e1446edc9b68", - "sha256:1ed3d5385d14be894e12a9033be989e012214a9811e7194849c94032ad69682a", - "sha256:203eb1532d51591d32e8dfafd60b5d31347ea7278c8da02b4b550287f6abe28b", - "sha256:213f9ef5c02ec2f883c1075d25a873149daadbaea50d18d622e9db55ec9849c2", - "sha256:2275f1a022e2383da5d2d101fe11ccdcbae799148c4b83260a4b9309fa3e1fc2", - "sha256:22e6de18f00583f06928cc8d0993104ecc62f7c6da6478db2255de89a30e45d1", - "sha256:289073f68452b96e70990085324be7223944c7409973d13ddfe0eea1c1b5663b", - "sha256:29ec8507664f94cc08457d98cfc41c3cdbddfa8952438e644177a29b04937876", - "sha256:2a55631b93e47956fbc97d69ba2054a8c6a4016f9a3064ec4e031f5f1030cb90", - "sha256:2a86d246a160d98d820ee7d02dc18c923c228de095be362e57b9fd8970b2c4a1", - "sha256:2bca97521ee786087f0c5ef318fef3eef0266a9c3deff88205523cf353af7394", - "sha256:2c8fc6c841ada60a86d29c9ebe2e8757c47eda6553f3596c560e59ca6e9b6fa1", - "sha256:2cd0c9fb5d40887500b4ed818770c68ab4fa6e0395d286f9704be6751b1b7d98", - "sha256:2d27d08056fcd61ff47a0cd8407eff4d3e816c82cb6b9c6f0ce9a0ad49225f81", - "sha256:2ffbf1b38c88d0466de542e91b08225d51782282512f8e2b11715126c41fda48", - "sha256:3fd503c27e7b7034128e30847ecdb4bff4ca5e60f29ad022a9f66ae8940d54ac", - "sha256:3ff1f585a0fdc1415bd733b804f33d386064a308672249b14828130dd43e7c31", - "sha256:41bd430b7b63aa802c02964e331ac0b177148fef5f807d2c90d05ce71a52b4d4", - "sha256:43e9b1531d6a898bdf086acb75c41265c7ec4331267d7619148d407efc72bd24", - "sha256:46af4a742b90c7460e94214f923452c2c1d050a9da1d2b8d4c70cbc045e692b7", - "sha256:46c4c550bf59ce05d6bff2c98053822549aaf9fbaf81103edea325e03350bca1", - "sha256:4969592e3cdeefa4cbb15a26cec102cbd4a1d6e5b695fac9fa026e19741138c8", - "sha256:4a0536ed2b9297c75104e1a3da330828ba1b2639fa53b38d396f98bf7e3c68df", - "sha256:4a96147791e49e84207dd1530109aa0e9eeaf1c8b7a59f150047fc0fcdf9bb64", - "sha256:4c7f9d70f99e1fbcbf57c75328b80e1c0a7f6cad43e75efa90a97221be5efe15", - "sha256:4e8474f7233fe1949ce4e03bea698a600c2d5d6b51dab6d6e6336dbe69acf23e", - "sha256:4f1b804cfad04f862d6a84af9d1ad941b06f671878f0f7ecad6c92007d423de6", - "sha256:529aab727f54a937085184e7436e1d0e19975cf10115eda12d37a683e4ee5342", - "sha256:5612b0b1de8d5114520094bd5fc3d04eb8af6f3e10d48ef05b7c8e77c1fd9545", - "sha256:56777c57246e048908b550af9b81b0ec9cf804fd47cb7502ccd93238bd6025c2", - "sha256:56ba7c1100ed079527f2b995bf5486a2e557e6d5b733c52e8947476338815b69", - "sha256:59d222086daa55421d599609b32d0ebe544e57654c4a0a1490c54a7ebaa67561", - "sha256:5aba767e64b494483ad60c4873bec78d16205a21f8247c99749bd990d9c846c2", - "sha256:5d5eaf988951f6ecb6854ca3300b87123599c711183c83da7ce39717a7cbdbce", - "sha256:73da69e1f612c3e682e34dcb971272d90d6f27b2c99acff444ca455a89978574", - "sha256:75c8766734ac0053e1d683567e65e85306c4ec62631b0591caeb287ac8f72e08", - "sha256:75eea40355a8690459c7291ce6c8ce39c27bd223675c7da6619f510c728feb97", - "sha256:80c3cf46511653f94dfe07c7c79ab105c4164d6e1dfcb35b7214fb9af53eaef4", - "sha256:8557c807388e6617161fe51b1a4747ea8d1133f2d2ad8e79583439abebe58fbd", - "sha256:89438e8885a186c69fe31f7ef98bb2bf29688c466c3caf9060f404c0be89ae80", - "sha256:899b03a3be785a7e1ff84b237da71f0efa2f021512f147dd34ffdf7aa82cb678", - "sha256:8de9b88f0cbac73cfed34220d13c57849e62a7099a714b929142425e926d223a", - "sha256:8f4d561f4728f825e3b793a53064b606ca0b6fc264f67d09e54af452aafc5b82", - "sha256:907b214da5d2fcff0b6ddb83de1333890ca92abaf4bbf8d9c61dc1b95c87fd6e", - "sha256:9118de88c16947eaf5b92f749e65b0501ea69e7c2be7bd6aefc12551622360e1", - "sha256:9568764e72d85cf7855ca78b48e07ed1be47bf230e2cea8dabda3c95f660b0ff", - "sha256:9c74cbee9e532dc34371127f7686d6953e5153a1f22beab7f953d95ee4a0fe09", - "sha256:9cdfd649011ce2d90cb0dd304c5aba1190fac0c266d19a9e2b96b81cfd150a09", - "sha256:9f00d54b18dd837f1431d66b076737deb7c29ce3ebb8412ceaf44d5e1954ac0c", - "sha256:a2f416cdfe92f5fbb77177f5f3f7830059d1582db05f2c7119bf80069d1ab69b", - "sha256:a4cb372e22e9c879bd9a9cc9b20b7c1fbf30a605ac953da45ecec05d8a6e1c77", - "sha256:a65de5c02884760a14a58304fb6303f9ddfc582e630f385daea871e1bdb18686", - "sha256:aa3b3a43dabc4cc57a7800f526cbe03f71c69121e21b863fdf497b59b462b163", - "sha256:ab0f7aabdbce4a202e013083eeab71afdb85efa405dc4a06fea98cde81204675", - "sha256:abe081453166e206e3a8c6d8ace57214c17b6d9477d7601ac14a365344dbc1f4", - "sha256:ae141c9017f8f473a6ee07a9425da021816a9f8c0683c2e5442f0ccf56b0fc62", - "sha256:af52078719209bef33e38131486fd784832dd8d1dc9b85f00a44f6e7437dd021", - "sha256:b00150a9a3fd0a8efaa90bc2696c105b04039d50763dd1c95a34c88c5966cb57", - "sha256:b2660000e1a113869c86eb5cc07f3343467490f3cd9d0299f81da9ddae7137b7", - "sha256:b3eb1a0d2b6d232d1bcdfc3fcc5f7b004ab3fbd9203011a3172f051d4527c0b6", - "sha256:b589d93a60e78fe55d5bc76ee8c2bf945dbdbb7cd16044c53e0307604e448de1", - "sha256:b8578fc6c8bdd0201327503720fa581000b4bd3934abbf07e2628d1ad3de157d", - "sha256:ba1b28e44f611f3f2b436bd8290050a61db4b59a8e24be4465f44897936b3824", - "sha256:bb44644371eaa29a3aba7b69b1862d0d56f073bb7585baa32e4271a71a91ee82", - "sha256:bcde80aefe7054fad6277762fb7e9d35c72ea479a485ae1bb14629c640987b30", - "sha256:bd1142d22fdb183a0fff66d79134bf644401437fed874f81066d314c67ee193c", - "sha256:bf77f9017fcfa1232f98598a637406e6c33982ccba8a5922339575c3e2b90ea5", - "sha256:c2772bb95062e3f9774140205cd65d8997e39620715486cf5f843cf4ad8f744c", - "sha256:c4ecc4e9a5d73a816cae36ee6b5d8b7a0c72013cae1e101406e832887c3dc2d8", - "sha256:c86231c66e4f422e7c13ea6200bb4048b3016c8bfd11b4fd0dabd04d2c8e3501", - "sha256:c8f6526df47953b07c45b95c4d1da6b9a0861c0e5da0271db96bb1d807825412", - "sha256:ccfb77f6dc8abffa6f1c7e3975ed9070a41ce5fcc11154d2bead8c1baa940f09", - "sha256:d9d7efaad48b859053b90dedd69bc92f2095084251e732e4c57ac9726bcb1e64", - "sha256:dd91a7d7a9ce7f4983097c91ce211f3e5569cc21caa16f2692298a07e396f82b", - "sha256:de4a2fd524993578fe093044f291b4b24aab134390030b3b9b5f87fd41ab7e75", - "sha256:df61f818edf7c8626bfa392f825860fb670b5f8336e238eb0ec7e2a5689cdded", - "sha256:e1147bc3d0dd1e549d991110d0a09557ec9f925dbc1ca62871fcdab2ec9d716b", - "sha256:e1954f4b239d1a92081647eecfd51cbfd08ea16eb743b8af1cd0113258feea14", - "sha256:e281b71922208e00886e4b7ffbfcf27874486364f177418ab676f102130e7ec9", - "sha256:e69737bd56006a86fd5a78b2b85447580a6138c930a75eb9ef39fe03d90782b1", - "sha256:e82b4a70cc67094f3f3fd77579702f48fcf1de7bdc67d79b8f1e24d089a6162c", - "sha256:e92e5817eb6bfed23aa5e45bfe30647b83602bdd6f9e25d63524d4e6258458b0", - "sha256:eaba0613c759ebf95988a84f766ca6b7432d55ce399194f95dde588ad1be0878", - "sha256:edd74b760a6bb950397e7a7bd2f38e6700f6525062650b1d77c6d851b82f02c2", - "sha256:f40abbcc0a7d9a8a80870af839d317e6932533f98682aabd977add6c53beeb23", - "sha256:fce7a8ee8d0f682c953c0188735d823f0fcb62779bf92cd6ba473a8e730e26ad" + "sha256:015de2ce2af1586ff5dc873e804434185199a15f7d96920ce67e50604592cae9", + "sha256:061c3ff1f51ecec256e916cf71cc01f9975af8fb3af9b94d3c0cc8702cfea637", + "sha256:08a80cf4884920863623a9ee9a285ee04cef57ebedc1cc87b3e3e0f24c8acfe5", + "sha256:09362f86ec201288d5687d1dc476b07bf39c08478cde837cb710b302864e7ec9", + "sha256:0bb4f48bd0dd18eebe826395e6a48b7331291078a879295bae4e5d053be50d4c", + "sha256:106af1653007cc569d5fbb5f08c6648a49fe4de74c2df814e234e282ebc06957", + "sha256:11fdd1192240dda8d6c5d18a06146e9045cb7e3ba7c06de6973000ff035df7c6", + "sha256:16a472300bc6c83fe4c2072cc22b3972f90d718d56f241adabc7ae509f53f154", + "sha256:176287bb998fd1e9846a9b666e240e58f8d3373e3bf87e7642f15af5405187b8", + "sha256:177914f81f66c86c012311f8c7f46887ec375cfcfd2a2f28233a3053ac93a569", + "sha256:177c9dd834cdf4dc39c27436ade6fdf9fe81484758885f2d616d5d03c0a83bd2", + "sha256:187700668c018a7e76e89424b7c1042f317c8df9161f00c0c903c82b0a8cac5c", + "sha256:1d9b5ee46dcb498fa3e46d4dfabcb531e1f2e76b477e0d99ef114f17bbd38453", + "sha256:22da15b902f9f8e267020d1c8bcfc4831ca646fecb60254f7bc71763569f56b1", + "sha256:24cd91a03543a0f8d09cb18d1cb27df80a84b5553d2bd94cba5979ef6af5c6e7", + "sha256:255f1a10ae39b52122cce26ce0781f7a616f502feecce9e616976f6a87992d6b", + "sha256:271c360fdc464fe6a75f13ea0c08ddf71a321f4c55fc20a3fe62ea3ef09df7d9", + "sha256:2ed83d53a8c5902ec48b90b2ac045e28e1698c0bea9441af9409fc844dc79496", + "sha256:2f3e1867dd574014253b4b8f01ba443b9c914e61d45f3674e452a915d6e929a3", + "sha256:35fbd23c1c8732cde7a94abe7fb071ec173c2f58c0bd0d7e5b669fdfc80a2c7b", + "sha256:37d0c59548ae56fae01c14998918d04ee0d5d3277363c10208eef8c4e2b68ed6", + "sha256:39d05e65f23a0fe897b6ac395f2a8d48c56ac0f583f5d663e0afec1da89b95da", + "sha256:3ad59efe24a4d54c2742929001f2d02803aafc15d6d781c21379e3f7f66ec842", + "sha256:3aed39db2f0ace76faa94f465d4234aac72e2f32b009f15da6492a561b3bbebd", + "sha256:3bbac1953c17252f9cc675bb19372444aadf0179b5df575ac4b56faaec9f6294", + "sha256:40bc802a696887b14c002edd43c18082cb7b6f9ee8b838239b03b56574d97f71", + "sha256:42f712b4668831c0cd85e0a5b5a308700fe068e37dcd24c0062904c4e372b093", + "sha256:448a66b8266de0b581246ca7cd6a73b8d98d15100fb7165974535fa3b577340e", + "sha256:485301ee56ce87a51ccb182a4b180d852c5cb2b3cb3a82f7d4714b4141119d8c", + "sha256:485747ee62da83366a44fbba963c5fe017860ad408ccd6cd99aa66ea80d32b2e", + "sha256:4cf0855a842c5b5c391dd32ca273b09e86abf8367572073bd1edfc52bc44446b", + "sha256:4eca20917a06d2fca7628ef3c8b94a8c358f6b43f1a621c9815243462dcccf97", + "sha256:4ed172d0c79f156c1b954e99c03bc2e3033c17efce8dd1a7c781bc4d5793dfac", + "sha256:5267cfda873ad62591b9332fd9472d2409f7cf02a34a9c9cb367e2c0255994bf", + "sha256:52b5cbc0469328e58180021138207e6ec91d7ca2e037d3549cc9e34e2187330a", + "sha256:53d7a3cd46cdc1689296348cb05ffd4f4280035770aee0c8ead3bbd4d6529acc", + "sha256:563646d74a4b4456d0cf3b714ca522e725243c603e8254ad85c3b59b7c0c4bf0", + "sha256:570cc326e78ff23dec7f41487aa9c3dffd02e5ee9ab43a8f6ccc3df8f9327623", + "sha256:5aca759ada6b1967fcfd4336dcf460d02a8a23e6abe06e90ea7881e5c22c4de6", + "sha256:5de11c041486681ce854c814844f4ce3282b6ea1656faae19208ebe09d31c5b8", + "sha256:5e271dd97c7bb8eefda5cca38cd0b0373a1fea50f71e8071376b46968582af9b", + "sha256:642ed0a209ced4be3a46f8cb094f2d76f1f479e2a1ceca6de6346a096cd3409d", + "sha256:6446002739ca29249f0beaaf067fcbc2b5aab4bc7ee8fb941bd194947ce19aff", + "sha256:691d50c99a937709ac4c4cd570d959a006bd6a6d970a484c84cc99543d4a5bbb", + "sha256:69b857a7d8bd4f5d6e0db4086da8c46309a26e8cefdfc778c0c5cc17d4b11e08", + "sha256:6ac3fefb0d168c7c6cab24fdfc80ec62cd2b4dfd9e65b84bdceb1cb01d385c33", + "sha256:6c9141af27a4e5819d74d67d227d5047a20fa3c7d4d9df43037a955b4c748ec5", + "sha256:7170cbde4070dc3c77dec82abf86f3b210633d4f89550fa0ad2d4b549a05572a", + "sha256:763ad59e105fca09705d9f9b29ecffb95ecdc3b0363be3bb56081b2c6de7977a", + "sha256:77076bdc8776a2b029e1e6ffbe6d7056e35f56f5e80d9dc0bad26ad4a024a762", + "sha256:7cd020b1fb41e3ab7716d4d2c3972d4588fdfbab9bfbbb64acc7078eccef8860", + "sha256:821392559d37759caa67d622d0d2994c7a3f2fb29274948ac799d496d92bca73", + "sha256:829e91f3a8574888b73e7a3feb3b1af698e717513597e23136ff4eba0bc8387a", + "sha256:850c272e0e0d1a5c5d73b1b7871b0a7c2446b304cec55ccdb3eaac0d792bb065", + "sha256:87d9b206b1bd7a0523375dc2020a6ce88bca5330682ae2fe25e86fd5d45cea9c", + "sha256:8bd01ff4032abaed03f2db702fa9a61078bee37add0bd884a6190b05e63b028c", + "sha256:8d54bbdf5d56e2c8cf81a1857250f3ea132de77af543d0ba5dce667183b61fec", + "sha256:8efaeb08ede95066da3a3e3c420fcc0a21693fcd0c4396d0585b019613d28515", + "sha256:8f94fdd756ba1f79f988855d948ae0bad9ddf44df296770d9a58c774cfbcca72", + "sha256:95cde244e7195b2c07ec9b73fa4c5026d4a27233451485caa1cd0c1b55f26dbd", + "sha256:975382d9aa90dc59253d6a83a5ca72e07f4ada3ae3d6c0575ced513db322b8ec", + "sha256:9dd9d9d9e898b9d30683bdd2b6c1849449158647d1049a125879cb397ee9cd12", + "sha256:a019a344312d0b1f429c00d49c3be62fa273d4a1094e1b224f403716b6d03be1", + "sha256:a4d9bfda3f84fc563868fe25ca160c8ff0e69bc4443c5647f960d59400ce6557", + "sha256:a657250807b6efd19b28f5922520ae002a54cb43c2401e6f3d0230c352564d25", + "sha256:a771417c9c06c56c9d53d11a5b084d1de75de82978e23c544270ab25e7c066ff", + "sha256:aad6ed9e70ddfb34d849b761fb243be58c735be6a9265b9060d6ddb77751e3e8", + "sha256:ae87137951bb3dc08c7d8bfb8988d8c119f3230731b08a71146e84aaa919a7a9", + "sha256:af247fd4f12cca4129c1b82090244ea5a9d5bb089e9a82feb5a2f7c6a9fe181d", + "sha256:b5d4bdd697195f3876d134101c40c7d06d46c6ab25159ed5cbd44105c715278a", + "sha256:b9255e7165083de7c1d605e818025e8860636348f34a79d84ec533546064f07e", + "sha256:c22211c165166de6683de8136229721f3d5c8606cc2c3d1562da9a3a5058049c", + "sha256:c55f9821f88e8bee4b7a72c82cfb5ecd22b6aad04033334f33c329b29bfa4da0", + "sha256:c7aed97f2e676561416c927b063802c8a6285e9b55e1b83213dfd99a8f4f9e48", + "sha256:cd2163f42868865597d89399a01aa33b7594ce8e2c4a28503127c81a2f17784e", + "sha256:ce5e7504db95b76fc89055c7f41e367eaadef5b1d059e27e1d6eabf2b55ca314", + "sha256:cff7351c251c7546407827b6a37bcef6416304fc54d12d44dbfecbb717064717", + "sha256:d27aa6bbc1f33be920bb7adbb95581452cdf23005d5611b29a12bb6a3468cc95", + "sha256:d3b52a67ac66a3a64a7e710ba629f62d1e26ca0504c29ee8cbd99b97df7079a8", + "sha256:de61e424062173b4f70eec07e12469edde7e17fa180019a2a0d75c13a5c5dc57", + "sha256:e10e6a1ed2b8661201e79dff5531f8ad4cdd83548a0f81c95cf79b3184b20c33", + "sha256:e1a0ffc39f51aa5f5c22114a8f1906b3c17eba68c5babb86c5f77d8b1bba14d1", + "sha256:e22491d25f97199fc3581ad8dd8ce198d8c8fdb8dae80dea3512e1ce6d5fa99f", + "sha256:e626b864725680cd3904414d72e7b0bd81c0e5b2b53a5b30b4273034253bb41f", + "sha256:e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9", + "sha256:e8d0f0eca087630d58b8c662085529781fd5dc80f0a54eda42d5c9029f812599", + "sha256:ea65b59882d5fa8c74a23f8960db579e5e341534934f43f3b18ec1839b893e41", + "sha256:ea93163472db26ac6043e8f7f93a05d9b59e0505c760da2a3cd22c7dd7111391", + "sha256:eab75a8569a095f2ad470b342f2751d9902f7944704f0571c8af46bede438475", + "sha256:ed8313809571a5463fd7db43aaca68ecb43ca7a58f5b23b6e6c6c5d02bdc7882", + "sha256:ef5fddfb264e89c435be4adb3953cef5d2936fdeb4463b4161a6ba2f22e7b740", + "sha256:ef750a20de1b65657a1425f77c525b0183eac63fe7b8f5ac0dd16f3668d3e64f", + "sha256:efb9ece97e696bb56e31166a9dd7919f8f0c6b31967b454718c6509f29ef6fee", + "sha256:f4c179a7aeae10ddf44c6bac87938134c1379c49c884529f090f9bf05566c836", + "sha256:f602881d80ee4228a2355c68da6b296a296cd22bbb91e5418d54577bbf17fa7c", + "sha256:fc2200e79d75b5238c8d69f6a30f8284290c777039d331e7340b6c17cad24a5a", + "sha256:fcc1ebb7561a3e24a6588f7c6ded15d80aec22c66a070c757559b57b17ffd1cb" ], "markers": "python_version >= '3.8'", - "version": "==0.10.2" + "version": "==0.10.3" }, "setproctitle": { "hashes": [ @@ -1894,28 +1846,20 @@ "markers": "python_version >= '3.7'", "version": "==2.3.0" }, - "tomli": { - "hashes": [ - "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", - "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" - ], - "markers": "python_version < '3.11'", - "version": "==2.0.1" - }, "traitlets": { "hashes": [ - "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8", - "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9" + "sha256:417745a96681fbb358e723d5346a547521f36e9bd0d50ba7ab368fff5d67aa54", + "sha256:f584ea209240466e66e91f3c81aa7d004ba4cf794990b0c775938a1544217cd1" ], - "markers": "python_version >= '3.7'", - "version": "==5.9.0" + "markers": "python_version >= '3.8'", + "version": "==5.10.0" }, "typing-extensions": { "hashes": [ "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2" ], - "markers": "python_version < '3.11'", + "markers": "python_version >= '3.7'", "version": "==4.7.1" }, "wcwidth": { diff --git a/api/api/controllers/search_controller.py b/api/api/controllers/search_controller.py index a723e52f9d6..e90061c9b7e 100644 --- a/api/api/controllers/search_controller.py +++ b/api/api/controllers/search_controller.py @@ -486,9 +486,13 @@ def search( # check things like provider density for a set of queries. tallies.count_provider_occurrences(results_to_tally, index) - search_context = SearchContext.build(results, origin_index) + if not results: + results = [] + + result_ids = [result.identifier for result in results] + search_context = SearchContext.build(result_ids, origin_index) - return results or [], page_count, result_count, search_context.asdict() + return results, page_count, result_count, search_context.asdict() def related_media(uuid, index, filter_dead): @@ -522,8 +526,12 @@ def related_media(uuid, index, filter_dead): result_count, _ = _get_result_and_page_count(response, results, page_size, page) - search_context = SearchContext.build(results, index) - return results or [], result_count, search_context.asdict() + if not results: + results = [] + + result_ids = [result.identifier for result in results] + search_context = SearchContext.build(result_ids, index) + return results, result_count, search_context.asdict() def get_sources(index): diff --git a/api/api/migrations/0052_relational_fields.py b/api/api/migrations/0052_relational_fields.py index 248e90cc934..f1f46fb9e8d 100644 --- a/api/api/migrations/0052_relational_fields.py +++ b/api/api/migrations/0052_relational_fields.py @@ -39,12 +39,12 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='matureaudio', name='identifier', - field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'), + field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive audio.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_audio', serialize=False, to='api.audio', to_field='identifier'), ), migrations.AlterField( model_name='matureimage', name='identifier', - field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the mature image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'), + field=models.OneToOneField(db_column="identifier", db_constraint=False, help_text='The reference to the sensitive image.', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, related_name='mature_image', serialize=False, to='api.image', to_field='identifier'), ), migrations.RenameField( model_name="audioreport", diff --git a/api/api/models/audio.py b/api/api/models/audio.py index 906651aaa8c..377fc8d716f 100644 --- a/api/api/models/audio.py +++ b/api/api/models/audio.py @@ -279,7 +279,7 @@ class MatureAudio(AbstractMatureMedia): db_constraint=False, db_column="identifier", related_name="mature_audio", - help_text="The reference to the mature audio.", + help_text="The reference to the sensitive audio.", ) class Meta: diff --git a/api/api/models/media.py b/api/api/models/media.py index 5cd795f1323..187e0c6817f 100644 --- a/api/api/models/media.py +++ b/api/api/models/media.py @@ -6,7 +6,7 @@ from django.db import models from django.utils.html import format_html -from elasticsearch import Elasticsearch, TransportError +from elasticsearch import Elasticsearch, NotFoundError from api.models.base import OpenLedgerModel from api.models.mixins import ForeignIdentifierMixin, IdentifierMixin, MediaMixin @@ -275,16 +275,14 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg refresh=True, **es_method_args, ) - except TransportError as e: - if e.status_code == 404: - # This is expected for the filtered index, but we should still - # log, just in case. - logger.warning( - f"Document with _id {document_id} not found " - f"in {index} index. No update performed." - ) - else: - raise e + except NotFoundError: + # This is expected for the filtered index, but we should still + # log, just in case. + logger.warning( + f"Document with _id {document_id} not found " + f"in {index} index. No update performed." + ) + continue class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel): @@ -353,7 +351,7 @@ class AbstractMatureMedia(PerformIndexUpdateMixin, models.Model): db_constraint=False, db_column="identifier", related_name="mature_abstract_media", - help_text="The reference to the mature media.", + help_text="The reference to the sensitive media.", ) """ Sub-classes must override this field to point to a concrete sub-class of diff --git a/api/api/serializers/media_serializers.py b/api/api/serializers/media_serializers.py index 9da1b411ba3..c56d394bb83 100644 --- a/api/api/serializers/media_serializers.py +++ b/api/api/serializers/media_serializers.py @@ -122,7 +122,7 @@ class MediaSearchRequestSerializer(serializers.Serializer): label="mature", default=False, required=False, - help_text="Whether to include content for mature audiences.", + help_text="Whether to include sensitive content.", ) # The ``unstable__`` prefix is used in the query params. @@ -365,6 +365,17 @@ class Meta: fields = ["identifier", "reason", "description"] read_only_fields = ["identifier"] + def to_internal_value(self, data): + """ + Map data before validation. + + See ``MediaReportRequestSerializer::_map_reason`` docstring for + further explanation. + """ + + data["reason"] = self._map_reason(data["reason"]) + return super().to_internal_value(data) + def validate(self, attrs): if ( attrs["reason"] == "other" @@ -373,8 +384,37 @@ def validate(self, attrs): raise serializers.ValidationError( "Description must be at least be 20 characters long" ) + return attrs + def _map_reason(self, value): + """ + Map `sensitive` to `mature` for forwards compatibility. + + This is an interim implementation until the API is updated + to use the new "sensitive" terminology. + + Once the API is updated to use "sensitive" as the designator + rather than the current "mature" term, this function should + be updated to reverse the mapping, that is, map `mature` to + `sensitive`, for backwards compatibility. + + Note: This cannot be implemented as a simpler `validate_reason` method + on the serializer because field validation runs _before_ validators + declared on the serializer. This means the choice field's validation + will complain about `reason` set to the incorrect value before we have + a chance to map it to the correct value. + + This could be mitigated by adding all values, current, future, and + deprecated, to the model field. However, that requires a migration + each time we make that change, and would send an incorrect message + about our data expectations. It's cleaner and more consistent to map + the data up-front, at serialization time, to prevent any confusion at + the data model level. + """ + + return "mature" if value == "sensitive" else value + ######################## # Response serializers # diff --git a/api/api/utils/search_context.py b/api/api/utils/search_context.py index 09cb1a7b766..d1505c476d8 100644 --- a/api/api/utils/search_context.py +++ b/api/api/utils/search_context.py @@ -4,7 +4,6 @@ from django.conf import settings from elasticsearch_dsl import Q, Search -from elasticsearch_dsl.response import Hit from api.constants.media_types import OriginIndex @@ -15,18 +14,18 @@ class SearchContext: # to convey that it is the Openverse result identifier and # not the document _id - all_result_identifiers: set[str] + all_result_identifiers: list[str] """All the result identifiers gathered for the search.""" sensitive_text_result_identifiers: set[str] """Subset of result identifiers for results with sensitive textual content.""" @classmethod - def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self: - if not results: - return cls(set(), set()) - - all_result_identifiers = {r.identifier for r in results} + def build( + cls, all_result_identifiers: list[str], origin_index: OriginIndex + ) -> Self: + if not all_result_identifiers: + return cls(list(), set()) if not settings.ENABLE_FILTERED_INDEX_QUERIES: return cls(all_result_identifiers, set()) @@ -41,14 +40,16 @@ def build(cls, results: list[Hit], origin_index: OriginIndex) -> Self: # cf: https://github.com/WordPress/openverse/issues/2154 Q( "terms", - **{"identifier.keyword": [result.identifier for result in results]}, + **{"identifier.keyword": all_result_identifiers}, ) ) # The default query size is 10, so we need to slice the query # to change the size to be big enough to encompass all the # results. - results_in_filtered_index = filtered_index_search[: len(results)].execute() + results_in_filtered_index = filtered_index_search[ + : len(all_result_identifiers) + ].execute() filtered_index_identifiers = { result.identifier for result in results_in_filtered_index } diff --git a/api/api/views/media_views.py b/api/api/views/media_views.py index 0ad9cd1e4f7..3ee2414ac73 100644 --- a/api/api/views/media_views.py +++ b/api/api/views/media_views.py @@ -12,6 +12,7 @@ from api.serializers.provider_serializers import ProviderSerializer from api.utils import image_proxy from api.utils.pagination import StandardPagination +from api.utils.search_context import SearchContext logger = logging.getLogger(__name__) @@ -87,6 +88,16 @@ def get_db_results(self, results): # Standard actions + def retrieve(self, request, *_, **__): + instance = self.get_object() + search_context = SearchContext.build( + [str(instance.identifier)], self.default_index + ).asdict() + serializer_context = search_context | self.get_serializer_context() + serializer = self.get_serializer(instance, context=serializer_context) + + return Response(serializer.data) + def list(self, request, *_, **__): params = self._get_request_serializer(request) diff --git a/api/conf/settings/base.py b/api/conf/settings/base.py index 903b1fc5092..74238568971 100644 --- a/api/conf/settings/base.py +++ b/api/conf/settings/base.py @@ -12,8 +12,6 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", - # Third-party installed apps, more can be added in other settings files. - "sslserver", ] MIDDLEWARE = [ diff --git a/api/conf/settings/elasticsearch.py b/api/conf/settings/elasticsearch.py index 4f8a6cfb65c..c5a8f48b331 100644 --- a/api/conf/settings/elasticsearch.py +++ b/api/conf/settings/elasticsearch.py @@ -1,55 +1,44 @@ """This file contains configuration pertaining to Elasticsearch.""" -from aws_requests_auth.aws_auth import AWSRequestsAuth from decouple import config -from elasticsearch import Elasticsearch, RequestsHttpConnection +from elasticsearch import Elasticsearch from elasticsearch_dsl import connections from api.constants.media_types import MEDIA_TYPES -from conf.settings.aws import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY -def _elasticsearch_connect(): +def _elasticsearch_connect() -> tuple[Elasticsearch, str]: """ Connect to configured Elasticsearch domain. :return: An Elasticsearch connection object. """ + es_scheme = config("ELASTICSEARCH_SCHEME", default="http://") es_url = config("ELASTICSEARCH_URL", default="localhost") es_port = config("ELASTICSEARCH_PORT", default=9200, cast=int) - es_aws_region = config("ELASTICSEARCH_AWS_REGION", default="us-east-1") - - auth = AWSRequestsAuth( - aws_access_key=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, - aws_host=es_url, - aws_region=es_aws_region, - aws_service="es", - ) - auth.encode = lambda x: bytes(x.encode("utf-8")) + + es_endpoint = f"{es_scheme}{es_url}:{es_port}" + _es = Elasticsearch( - host=es_url, - port=es_port, - connection_class=RequestsHttpConnection, - timeout=10, + es_endpoint, + request_timeout=10, max_retries=1, retry_on_timeout=True, - http_auth=auth, - wait_for_status="yellow", ) _es.info() - return _es + _es.cluster.health(wait_for_status="yellow") + return _es, es_endpoint SETUP_ES = config("SETUP_ES", default=True, cast=bool) if SETUP_ES: - ES = _elasticsearch_connect() + ES, ES_ENDPOINT = _elasticsearch_connect() #: Elasticsearch client, also aliased to connection 'default' connections.add_connection("default", ES) else: - ES = None + ES, ES_ENDPOINT = None, None MEDIA_INDEX_MAPPING = { media_type: config(f"{media_type.upper()}_INDEX_NAME", default=media_type) diff --git a/api/test/factory/models/__init__.py b/api/test/factory/models/__init__.py index 7cdf1308571..628c8d484eb 100644 --- a/api/test/factory/models/__init__.py +++ b/api/test/factory/models/__init__.py @@ -1,9 +1,14 @@ from test.factory.models.audio import ( AudioAddOnFactory, AudioFactory, + AudioReportFactory, MatureAudioFactory, ) -from test.factory.models.image import ImageFactory, MatureImageFactory +from test.factory.models.image import ( + ImageFactory, + ImageReportFactory, + MatureImageFactory, +) from test.factory.models.oauth2 import ( AccessTokenFactory, OAuth2RegistrationFactory, diff --git a/api/test/factory/models/audio.py b/api/test/factory/models/audio.py index c6f88cc820a..9735b2c2c1a 100644 --- a/api/test/factory/models/audio.py +++ b/api/test/factory/models/audio.py @@ -4,7 +4,7 @@ import factory from factory.django import DjangoModelFactory -from api.models.audio import Audio, AudioAddOn, MatureAudio +from api.models.audio import Audio, AudioAddOn, AudioReport, MatureAudio class MatureAudioFactory(DjangoModelFactory): @@ -28,3 +28,10 @@ class Meta: audio_identifier = IdentifierFactory(AudioFactory) waveform_peaks = Faker("waveform") + + +class AudioReportFactory(DjangoModelFactory): + class Meta: + model = AudioReport + + media_obj = factory.SubFactory(AudioFactory) diff --git a/api/test/factory/models/image.py b/api/test/factory/models/image.py index 025f8acdf39..dc034ed3db0 100644 --- a/api/test/factory/models/image.py +++ b/api/test/factory/models/image.py @@ -1,9 +1,9 @@ -from test.factory.models.media import MediaFactory +from test.factory.models.media import MediaFactory, MediaReportFactory import factory from factory.django import DjangoModelFactory -from api.models.image import Image, MatureImage +from api.models.image import Image, ImageReport, MatureImage class MatureImageFactory(DjangoModelFactory): @@ -18,3 +18,10 @@ class ImageFactory(MediaFactory): class Meta: model = Image + + +class ImageReportFactory(MediaReportFactory): + class Meta: + model = ImageReport + + media_obj = factory.SubFactory(ImageFactory) diff --git a/api/test/factory/models/media.py b/api/test/factory/models/media.py index 8fc6505d602..2df54763b8d 100644 --- a/api/test/factory/models/media.py +++ b/api/test/factory/models/media.py @@ -4,6 +4,7 @@ from django.conf import settings import factory +import pook from elasticsearch import Elasticsearch from elasticsearch_dsl.response import Hit from factory.django import DjangoModelFactory @@ -95,6 +96,12 @@ def create(cls, *args, **kwargs) -> AbstractMedia | tuple[AbstractMedia, Hit]: skip_es = kwargs.pop("skip_es", False) with_hit = kwargs.pop("with_hit", False) + pook_active = pook.isactive() + if pook_active: + # Temporarily disable pook so that the calls to ES to create + # the factory document don't fail + pook.disable() + model_class = cls._meta.get_model_class() if cls._highest_pre_existing_pk is None: response = settings.ES.search( @@ -121,6 +128,10 @@ def create(cls, *args, **kwargs) -> AbstractMedia | tuple[AbstractMedia, Hit]: if mature_reported: cls._mature_factory.create(media_obj=model) + if pook_active: + # Reactivate pook if it was active + pook.activate() + if with_hit: return model, hit @@ -154,6 +165,7 @@ def _save_model_to_es( origin_index = media._meta.db_table source_document = cls._create_es_source_document(media, mature) + es.create( index=origin_index, id=str(media.pk), @@ -190,3 +202,8 @@ class IdentifierFactory(factory.SubFactory): def evaluate(self, instance, step, extra): model = super().evaluate(instance, step, extra) return model.identifier + + +class MediaReportFactory(DjangoModelFactory): + class Meta: + abstract = True diff --git a/api/test/media_integration.py b/api/test/media_integration.py index ad31182ec97..952353425d2 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -153,3 +153,28 @@ def related(fixture): related_url = fixture["results"][0]["related_url"] response = requests.get(related_url) assert response.status_code == 200 + + +def sensitive_search_and_detail(media_type): + search_res = requests.get( + f"{API_URL}/v1/{media_type}/", + params={"q": "bird", "unstable__include_sensitive_results": "true"}, + verify=False, + ) + results = search_res.json()["results"] + + sensitive_result = None + sensitivities = [] + for result in results: + if sensitivities := result["unstable__sensitivity"]: + sensitive_result = result + break + assert sensitive_result is not None + assert len(sensitivities) != 0 + + detail_res = requests.get( + f"{API_URL}/v1/{media_type}/{sensitive_result['id']}", verify=False + ) + details = detail_res.json() + + assert sensitivities == details["unstable__sensitivity"] diff --git a/api/test/test_audio_integration.py b/api/test/test_audio_integration.py index a1ecbdb40f0..9c4aa0210da 100644 --- a/api/test/test_audio_integration.py +++ b/api/test/test_audio_integration.py @@ -20,6 +20,7 @@ search_quotes_exact, search_source_and_excluded, search_special_chars, + sensitive_search_and_detail, stats, uuid_validation, ) @@ -157,3 +158,7 @@ def test_audio_uuid_validation(): def test_audio_related(audio_fixture): related(audio_fixture) + + +def test_audio_sensitive_search_and_detail(): + sensitive_search_and_detail("audio") diff --git a/api/test/test_image_integration.py b/api/test/test_image_integration.py index c6de939f0ba..2ed9d9b0bc3 100644 --- a/api/test/test_image_integration.py +++ b/api/test/test_image_integration.py @@ -19,6 +19,7 @@ search_quotes_exact, search_source_and_excluded, search_special_chars, + sensitive_search_and_detail, stats, uuid_validation, ) @@ -136,3 +137,7 @@ def test_image_uuid_validation(): def test_image_related(image_fixture): related(image_fixture) + + +def test_audio_sensitive_search_and_detail(): + sensitive_search_and_detail("images") diff --git a/api/test/unit/conftest.py b/api/test/unit/conftest.py index f838fb61961..5dd314db2ed 100644 --- a/api/test/unit/conftest.py +++ b/api/test/unit/conftest.py @@ -1,23 +1,40 @@ from dataclasses import dataclass from test.factory import models as model_factories -from test.factory.models.media import CREATED_BY_FIXTURE_MARKER, MediaFactory +from test.factory.models.media import ( + CREATED_BY_FIXTURE_MARKER, + MediaFactory, + MediaReportFactory, +) from unittest.mock import MagicMock from rest_framework.test import APIClient, APIRequestFactory +import pook import pytest from elasticsearch import Elasticsearch from fakeredis import FakeRedis +from api.models import ( + Audio, + DeletedAudio, + DeletedImage, + Image, + MatureAudio, + MatureImage, +) +from api.models.media import AbstractDeletedMedia, AbstractMatureMedia, AbstractMedia from api.serializers.audio_serializers import ( + AudioReportRequestSerializer, AudioSearchRequestSerializer, AudioSerializer, ) from api.serializers.image_serializers import ( + ImageReportRequestSerializer, ImageSearchRequestSerializer, ImageSerializer, ) from api.serializers.media_serializers import ( + MediaReportRequestSerializer, MediaSearchRequestSerializer, MediaSerializer, ) @@ -63,9 +80,18 @@ class MediaTypeConfig: origin_index: str filtered_index: str model_factory: MediaFactory + model_class: AbstractMedia mature_factory: MediaFactory + mature_class: AbstractMatureMedia search_request_serializer: MediaSearchRequestSerializer model_serializer: MediaSerializer + report_serializer: MediaReportRequestSerializer + report_factory: MediaReportFactory + deleted_class: AbstractDeletedMedia + + @property + def indexes(self): + return (self.origin_index, self.filtered_index) MEDIA_TYPE_CONFIGS = { @@ -75,9 +101,14 @@ class MediaTypeConfig: origin_index="image", filtered_index="image-filtered", model_factory=model_factories.ImageFactory, + model_class=Image, mature_factory=model_factories.MatureImageFactory, search_request_serializer=ImageSearchRequestSerializer, model_serializer=ImageSerializer, + report_serializer=ImageReportRequestSerializer, + report_factory=model_factories.ImageReportFactory, + mature_class=MatureImage, + deleted_class=DeletedImage, ), "audio": MediaTypeConfig( media_type="audio", @@ -85,9 +116,14 @@ class MediaTypeConfig: origin_index="audio", filtered_index="audio-filtered", model_factory=model_factories.AudioFactory, + model_class=Audio, mature_factory=model_factories.MatureAudioFactory, search_request_serializer=AudioSearchRequestSerializer, model_serializer=AudioSerializer, + report_serializer=AudioReportRequestSerializer, + report_factory=model_factories.AudioReportFactory, + mature_class=MatureAudio, + deleted_class=DeletedAudio, ), } @@ -123,8 +159,16 @@ def cleanup_elasticsearch_test_documents(request, settings): es: Elasticsearch = settings.ES + # If pook was activated by a test and not deactivated + # (usually because the test failed and something prevent + # pook from cleaning up after itself), disable here so that + # the ES request on the next line doesn't get intercepted, + # causing pook to raise an exception about the request not + # matching and the fixture documents not getting cleaned. + pook.disable() + es.delete_by_query( index="*", - body={"query": {"match": {"tags.name": CREATED_BY_FIXTURE_MARKER}}}, + query={"match": {"tags.name": CREATED_BY_FIXTURE_MARKER}}, refresh=True, ) diff --git a/api/test/unit/controllers/test_search_controller.py b/api/test/unit/controllers/test_search_controller.py index 3ed985456d6..e802be4920e 100644 --- a/api/test/unit/controllers/test_search_controller.py +++ b/api/test/unit/controllers/test_search_controller.py @@ -576,15 +576,19 @@ def test_no_post_process_results_recursion( hit_count=hit_count, ) - es_host = settings.ES.transport.kwargs["host"] - es_port = settings.ES.transport.kwargs["port"] - # `origin_index` enforced by passing `exact_index=True` below. es_endpoint = ( - f"http://{es_host}:{es_port}/{image_media_type_config.origin_index}/_search" + f"{settings.ES_ENDPOINT}/{image_media_type_config.origin_index}/_search" ) - mock_search = pook.post(es_endpoint).times(1).reply(200).json(mock_es_response).mock + mock_search = ( + pook.post(es_endpoint) + .times(1) + .reply(200) + .header("x-elastic-product", "Elasticsearch") + .json(mock_es_response) + .mock + ) # Ensure dead link filtering does not remove any results pook.head( @@ -682,12 +686,9 @@ def test_post_process_results_recurses_as_needed( base_hits=mock_es_response_1["hits"]["hits"], ) - es_host = settings.ES.transport.kwargs["host"] - es_port = settings.ES.transport.kwargs["port"] - # `origin_index` enforced by passing `exact_index=True` below. es_endpoint = ( - f"http://{es_host}:{es_port}/{image_media_type_config.origin_index}/_search" + f"{settings.ES_ENDPOINT}/{image_media_type_config.origin_index}/_search" ) # `from` is always 0 if there is no query mask @@ -703,6 +704,7 @@ def test_post_process_results_recurses_as_needed( .body(re.compile('from":0')) .times(1) .reply(200) + .header("x-elastic-product", "Elasticsearch") .json(mock_es_response_1) .mock ) @@ -714,6 +716,7 @@ def test_post_process_results_recurses_as_needed( .body(re.compile('from":0')) .times(1) .reply(200) + .header("x-elastic-product", "Elasticsearch") .json(mock_es_response_2) .mock ) diff --git a/api/test/unit/models/test_media_report.py b/api/test/unit/models/test_media_report.py index 9ca4fb6706c..0433647015a 100644 --- a/api/test/unit/models/test_media_report.py +++ b/api/test/unit/models/test_media_report.py @@ -1,24 +1,13 @@ import uuid -from test.factory.models.audio import AudioFactory -from test.factory.models.image import ImageFactory from typing import Literal, Union -from unittest.mock import MagicMock, call, patch from django.core.exceptions import ObjectDoesNotExist +import pook import pytest -from elasticsearch import TransportError - -from api.models import ( - Audio, - AudioReport, - DeletedAudio, - DeletedImage, - Image, - ImageReport, - MatureAudio, - MatureImage, -) +from elasticsearch import BadRequestError, NotFoundError + +from api.models import DeletedAudio, DeletedImage, MatureAudio, MatureImage from api.models.media import ( DEINDEXED, DMCA, @@ -38,153 +27,106 @@ reason_params = pytest.mark.parametrize("reason", [DMCA, MATURE, OTHER]) -@pytest.mark.parametrize( - "media_type, report_class", [("image", ImageReport), ("audio", AudioReport)] -) @reason_params -def test_cannot_report_invalid_identifier(media_type, report_class, reason): +def test_cannot_report_invalid_identifier(media_type_config, reason): with pytest.raises(ObjectDoesNotExist): - report_class.objects.create( + media_type_config.report_factory.create( media_obj_id=uuid.uuid4(), reason=reason, ) -@pytest.mark.parametrize( - "media_type, report_class, mature_class, deleted_class, model_factory", - [ - ("image", ImageReport, MatureImage, DeletedImage, ImageFactory), - ("audio", AudioReport, MatureAudio, DeletedAudio, AudioFactory), - ], -) @reason_params def test_pending_reports_have_no_subreport_models( - media_type: MediaType, - report_class, - mature_class, - deleted_class, + media_type_config, reason, - model_factory, ): - media = model_factory.create() - report = report_class.objects.create(media_obj=media, reason=reason) + media = media_type_config.model_factory.create() + report = media_type_config.report_factory.create(media_obj=media, reason=reason) assert report.status == PENDING - assert not mature_class.objects.filter(media_obj=media).exists() - assert not deleted_class.objects.filter(media_obj=media).exists() + assert not media_type_config.mature_class.objects.filter(media_obj=media).exists() + assert not media_type_config.deleted_class.objects.filter(media_obj=media).exists() -@pytest.mark.parametrize( - "media_type, report_class, mature_class, model_factory", - [ - ("image", ImageReport, MatureImage, ImageFactory), - ("audio", AudioReport, MatureAudio, AudioFactory), - ], -) -def test_mature_filtering_creates_mature_image_instance( - media_type: MediaType, report_class, mature_class, model_factory -): - media = model_factory.create() - mock_es = MagicMock() - with patch("django.conf.settings.ES", mock_es): - report_class.objects.create( - media_obj=media, reason=MATURE, status=MATURE_FILTERED - ) +def test_mature_filtering_creates_mature_image_instance(media_type_config, settings): + media = media_type_config.model_factory.create() - assert mature_class.objects.filter(media_obj=media).exists() - mock_es.update.assert_has_calls( - [ - call( - id=media.id, - index=media_type, - doc={"mature": True}, - refresh=True, - ), - call( - id=media.id, - index=f"{media_type}-filtered", - doc={"mature": True}, - refresh=True, - ), - ] + media_type_config.report_factory.create( + media_obj=media, reason=MATURE, status=MATURE_FILTERED ) - assert media.mature + assert media_type_config.mature_class.objects.filter(media_obj=media).exists() -@pytest.mark.parametrize( - "media_type, report_class, mature_class, model_factory", - [ - ("image", ImageReport, MatureImage, ImageFactory), - ("audio", AudioReport, MatureAudio, AudioFactory), - ], -) -def test_deleting_mature_image_instance_resets_mature_flag( - media_type: MediaType, report_class, mature_class, model_factory -): - media = model_factory.create() - mock_es = MagicMock() - with patch("django.conf.settings.ES", mock_es): - # Mark as mature. - report_class.objects.create( - media_obj=media, reason=MATURE, status=MATURE_FILTERED + for index in media_type_config.indexes: + doc = settings.ES.get( + index=index, + id=media.pk, + # get defaults to "realtime", meaning it ignores refreshes + # disable it here to implicitly test that the index was refreshed + # when the document was updated + realtime=False, ) - # Delete mature instance. - mature_class.objects.get(media_obj=media).delete() - - mock_es.update.assert_has_calls( - [ - call( - id=media.pk, - refresh=True, - index=media_type, - doc={"mature": True}, - ), - call( - id=media.pk, - refresh=True, - index=f"{media_type}-filtered", - doc={"mature": True}, - ), - call( - id=media.pk, - refresh=True, - index=media_type, - doc={"mature": False}, - ), - call( - id=media.pk, - refresh=True, - index=f"{media_type}-filtered", - doc={"mature": False}, - ), - ], + assert doc["found"] + assert doc["_source"]["mature"] + + assert media.mature + + +def test_deleting_mature_image_instance_resets_mature_flag(media_type_config, settings): + media = media_type_config.model_factory.create() + # Mark as mature. + media_type_config.report_factory.create( + media_obj=media, reason=MATURE, status=MATURE_FILTERED ) + # Delete mature instance. + media_type_config.mature_class.objects.get(media_obj=media).delete() + + # Assert the media are back to mature=False + # The previous test asserts they get set to mature=True + # in the first place, so it's not necessary to add those + # assertions here + for index in media_type_config.indexes: + doc = settings.ES.get( + index=index, + id=media.pk, + # get defaults to "realtime", meaning it ignores refreshes + # disable it here to implicitly test that the index was refreshed + # when the document was updated + realtime=False, + ) + assert doc["found"] + assert not doc["_source"]["mature"] + media.refresh_from_db() assert not media.mature -@pytest.mark.parametrize( - "media_type, media_class, report_class, deleted_class, model_factory", - [ - ("image", Image, ImageReport, DeletedImage, ImageFactory), - ("audio", Audio, AudioReport, DeletedAudio, AudioFactory), - ], -) -def test_deindexing_creates_deleted_image_instance( - media_type: MediaType, media_class, report_class, deleted_class, model_factory -): - media = model_factory.create() +def test_deindexing_creates_deleted_image_instance(media_type_config, settings): + media = media_type_config.model_factory.create() # Extracting field values because ``media`` will be deleted. image_id = media.id identifier = media.identifier - mock_es = MagicMock() - with patch("django.conf.settings.ES", mock_es): - report_class.objects.create(media_obj=media, reason=DMCA, status=DEINDEXED) + media_type_config.report_factory.create( + media_obj=media, reason=DMCA, status=DEINDEXED + ) - assert deleted_class.objects.filter(media_obj=media).exists() - assert not media_class.objects.filter(identifier=identifier).exists() - assert mock_es.delete.called_with(id=image_id) + assert media_type_config.deleted_class.objects.filter(media_obj=media).exists() + assert not media_type_config.model_class.objects.filter( + identifier=identifier + ).exists() + + for index in media_type_config.indexes: + with pytest.raises(NotFoundError): + settings.ES.get( + index=index, + id=image_id, + # get defaults to "realtime", meaning it ignores refreshes + # disable it here to implicitly test that the index was refreshed + # when the document was updated + realtime=False, + ) def test_all_deleted_media_covered(): @@ -205,202 +147,132 @@ def test_all_mature_media_covered(): assert set(AbstractMatureMedia.__subclasses__()) == {MatureAudio, MatureImage} -@pytest.mark.parametrize( - ("model_factory", "deleted_media_class", "indexes"), - ( - (ImageFactory, DeletedImage, ("image", "image-filtered")), - (AudioFactory, DeletedAudio, ("audio", "audio-filtered")), - ), -) def test_deleted_media_deletes_from_all_indexes( - settings, model_factory, deleted_media_class, indexes + media_type_config, + settings, ): - settings.ES = MagicMock() - media = model_factory.create() + media = media_type_config.model_factory.create() # Need to retrieve this here because the creation of the # deleted media class below will delete this object, rendering # the pk empty by the time we assert the calls media_id = media.pk - instance = deleted_media_class( + instance = media_type_config.deleted_class( media_obj=media, ) instance.save() - settings.ES.delete.assert_has_calls( - (call(index=index, id=media_id, refresh=True) for index in indexes), - # The order does not matter - any_order=True, - ) - - -@pytest.mark.parametrize( - ("model_factory", "deleted_media_class", "indexes"), - ( - (ImageFactory, DeletedImage, ("image", "image-filtered")), - (AudioFactory, DeletedAudio, ("audio", "audio-filtered")), - ), -) -def test_deleted_media_ignores_elasticsearch_404_errors( - settings, model_factory, deleted_media_class, indexes -): - settings.ES = MagicMock() - error = TransportError(404, "Whoops, no document!", {}) - settings.ES.delete.side_effect = [None, error] - media = model_factory.create() - # Need to retrieve this here because the creation of the - # deleted media class below will delete this object, rendering - # the pk empty by the time we assert the calls - media_id = media.pk - - instance = deleted_media_class( - media_obj=media, - ) - - instance.save() + for index in media_type_config.indexes: + with pytest.raises(NotFoundError): + settings.ES.get( + index=index, + id=media_id, + realtime=False, + ) - settings.ES.delete.assert_has_calls( - (call(index=index, id=media_id, refresh=True) for index in indexes), - # The order does not matter - any_order=True, - ) +@pook.on +def test_deleted_media_ignores_elasticsearch_404_errors(settings, media_type_config): + media = media_type_config.model_factory.create() -@pytest.mark.parametrize( - ("model_factory", "deleted_media_class", "indexes"), - ( - (ImageFactory, DeletedImage, ("image", "image-filtered")), - (AudioFactory, DeletedAudio, ("audio", "audio-filtered")), - ), -) -def test_deleted_media_raises_elasticsearch_400_errors( - settings, model_factory, deleted_media_class, indexes -): - settings.ES = MagicMock() - error = TransportError(400, "Terrible request, no thanks", {}) - settings.ES.delete.side_effect = [None, error] - media = model_factory.create() - # Need to retrieve this here because the creation of the - # deleted media class below will delete this object, rendering - # the pk empty by the time we assert the calls + es_mocks = [] + for index in media_type_config.indexes: + es_mocks.append( + pook.delete(settings.ES_ENDPOINT) + .path(f"/{index}/_doc/{media.pk}") + .param("refresh", "true") + .reply(404) + .mock + ) - instance = deleted_media_class( + # This should succeed despite the 404s forced above + media_type_config.deleted_class.objects.create( media_obj=media, ) - with pytest.raises(TransportError): - instance.save() + for mock in es_mocks: + assert mock.matched, f"{repr(mock.matchers)} did not match!" - settings.ES.delete.assert_has_calls( - (call(index=index, id=media.pk, refresh=True) for index in indexes), - # The order does not matter - any_order=True, - ) +@pook.on +def test_deleted_media_raises_elasticsearch_400_errors(settings, media_type_config): + media = media_type_config.model_factory.create() -@pytest.mark.parametrize( - ("model_factory", "mature_media_class", "indexes"), - ( - (ImageFactory, MatureImage, ("image", "image-filtered")), - (AudioFactory, MatureAudio, ("audio", "audio-filtered")), - ), -) -def test_mature_media_updates_all_indexes( - settings, model_factory, mature_media_class, indexes -): - settings.ES = MagicMock() - media = model_factory.create() + es_mocks: list[pook.Mock] = [] + for index in media_type_config.indexes: + es_mocks.append( + pook.delete(settings.ES_ENDPOINT) + .path(f"/{index}/_doc/{media.pk}") + .param("refresh", "true") + .reply(400) + .mock + ) - instance = mature_media_class( - media_obj=media, - ) + with pytest.raises(BadRequestError): + media_type_config.deleted_class.objects.create( + media_obj=media, + ) - instance.save() - - settings.ES.update.assert_has_calls( - ( - call( - index=index, - id=media.id, - doc={"mature": True}, - refresh=True, - ) - for index in indexes - ), - # The order does not matter - any_order=True, - ) + # Because we're causing a 400, and because that re-raises + # in the update, only one of the requests ever gets sent + # Therefore, one should remain pending and at least one + # should have matched + # Take this approach to avoid being concerned with the + # order of the requests, which doesn't matter + assert len([m for m in es_mocks if m.matched]) == 1 -@pytest.mark.parametrize( - ("model_factory", "mature_media_class", "indexes"), - ( - (ImageFactory, MatureImage, ("image", "image-filtered")), - (AudioFactory, MatureAudio, ("audio", "audio-filtered")), - ), -) +@pook.on def test_mature_media_ignores_elasticsearch_404_errors( - settings, model_factory, mature_media_class, indexes + settings, + media_type_config, ): - settings.ES = MagicMock() - error = TransportError(404, "Whoops, no document!", {}) - settings.ES.update.side_effect = [None, error] - media = model_factory.create() + media = media_type_config.model_factory.create() + + es_mocks = [] + for index in media_type_config.indexes: + es_mocks.append( + pook.post(settings.ES_ENDPOINT) + .path(f"/{index}/_update/{media.pk}") + .param("refresh", "true") + .reply(404) + .mock + ) - instance = mature_media_class( + # This should pass despite the 404 enforced above + media_type_config.mature_factory.create( media_obj=media, ) - instance.save() - - settings.ES.update.assert_has_calls( - ( - call( - index=index, - id=media.id, - doc={"mature": True}, - refresh=True, - ) - for index in indexes - ), - # The order does not matter - any_order=True, - ) + for mock in es_mocks: + assert mock.matched, f"{repr(mock.matchers)} did not match!" -@pytest.mark.parametrize( - ("model_factory", "mature_media_class", "indexes"), - ( - (ImageFactory, MatureImage, ("image", "image-filtered")), - (AudioFactory, MatureAudio, ("audio", "audio-filtered")), - ), -) -def test_mature_media_reraises_elasticsearch_400_errors( - settings, model_factory, mature_media_class, indexes -): - settings.ES = MagicMock() - error = TransportError(400, "Terrible request, no thanks.", {}) - settings.ES.update.side_effect = [None, error] - media = model_factory.create() +@pook.on +def test_mature_media_reraises_elasticsearch_400_errors(settings, media_type_config): + media = media_type_config.model_factory.create() - instance = mature_media_class( - media_obj=media, - ) + es_mocks = [] + for index in media_type_config.indexes: + es_mocks.append( + pook.post(settings.ES_ENDPOINT) + .path(f"/{index}/_update/{media.pk}") + .param("refresh", "true") + .reply(400) + .mock + ) - with pytest.raises(TransportError): - instance.save() + # This should fail due to the 400 enforced above + with pytest.raises(BadRequestError): + media_type_config.mature_factory.create( + media_obj=media, + ) - settings.ES.update.assert_has_calls( - ( - call( - index=index, - id=media.id, - doc={"mature": True}, - refresh=True, - ) - for index in indexes - ), - # The order does not matter - any_order=True, - ) + # Because we're causing a 400, and because that re-raises + # in the update, only one of the requests ever gets sent + # Therefore, one should remain pending and at least one + # should have matched + # Take this approach to avoid being concerned with the + # order of the requests, which doesn't matter + assert len([m for m in es_mocks if m.matched]) == 1 diff --git a/api/test/unit/serializers/test_media_serializers.py b/api/test/unit/serializers/test_media_serializers.py index 9aea4c65fb1..46edaedea8f 100644 --- a/api/test/unit/serializers/test_media_serializers.py +++ b/api/test/unit/serializers/test_media_serializers.py @@ -248,3 +248,35 @@ def test_index_is_only_set_if_matches_media_type( ) assert serializer.is_valid() == is_valid assert serializer.validated_data.get("index") == (index if is_valid else None) + + +@pytest.mark.django_db +def test_report_serializer_maps_sensitive_reason_to_mature(media_type_config): + media = media_type_config.model_factory.create() + serializer = media_type_config.report_serializer( + data={ + "identifier": media.identifier, + "reason": "sensitive", + "description": "Boop beep this is sensitive, whoa!", + } + ) + + serializer.is_valid(raise_exception=True) + + assert serializer.validated_data["reason"] == "mature" + + +@pytest.mark.django_db +def test_report_serializer_accepts_mature_reason(media_type_config): + media = media_type_config.model_factory.create() + serializer = media_type_config.report_serializer( + data={ + "identifier": media.identifier, + "reason": "mature", + "description": "Boop beep this is sensitive, whoa!", + } + ) + + serializer.is_valid(raise_exception=True) + + assert serializer.validated_data["reason"] == "mature" diff --git a/api/test/unit/utils/test_search_context.py b/api/test/unit/utils/test_search_context.py index b20817acf70..25b45848799 100644 --- a/api/test/unit/utils/test_search_context.py +++ b/api/test/unit/utils/test_search_context.py @@ -10,7 +10,7 @@ def test_no_results(media_type_config): search_context = SearchContext.build([], media_type_config.origin_index) - assert search_context == SearchContext(set(), set()) + assert search_context == SearchContext(list(), set()) @pytest.mark.parametrize( @@ -48,27 +48,25 @@ def test_sensitive_text( ) results = [maybe_sensitive_text_hit] + [hit for _, hit in clear_results] + result_ids = [result.identifier for result in results] if not setting_enabled: - es_host = settings.ES.transport.kwargs["host"] - es_port = settings.ES.transport.kwargs["port"] - with pook.post( - f"http://{es_host}:{es_port}/{media_type_config.filtered_index}/_search", + f"{settings.ES_ENDPOINT}/{media_type_config.filtered_index}/_search", reply=500, ) as mock: search_context = SearchContext.build( - results, media_type_config.origin_index + result_ids, media_type_config.origin_index ) assert ( mock.total_matches == 0 ), "There should be zero requests to ES if the setting is disabled" pook.off() else: - search_context = SearchContext.build(results, media_type_config.origin_index) + search_context = SearchContext.build(result_ids, media_type_config.origin_index) assert search_context == SearchContext( - {r.identifier for r in results}, + [r.identifier for r in results], {maybe_sensitive_text_model.identifier} if has_sensitive_text and setting_enabled else set(), diff --git a/catalog/DAGs.md b/catalog/DAGs.md index 7774008effb..903237bf3e5 100644 --- a/catalog/DAGs.md +++ b/catalog/DAGs.md @@ -91,6 +91,7 @@ The following are DAGs grouped by their primary tag: | [`freesound_workflow`](#freesound_workflow) | `@quarterly` | `False` | audio | | [`inaturalist_workflow`](#inaturalist_workflow) | `0 0 2 * *` | `False` | image | | [`jamendo_workflow`](#jamendo_workflow) | `@monthly` | `False` | audio | +| [`justtakeitfree_workflow`](#justtakeitfree_workflow) | `@monthly` | `False` | image | | [`metropolitan_museum_workflow`](#metropolitan_museum_workflow) | `@daily` | `True` | image | | `museum_victoria_workflow` | `@monthly` | `False` | image | | [`nappy_workflow`](#nappy_workflow) | `@monthly` | `False` | image | @@ -136,6 +137,7 @@ The following is documentation associated with each DAG (where available): 1. [`image_popularity_refresh`](#image_popularity_refresh) 1. [`inaturalist_workflow`](#inaturalist_workflow) 1. [`jamendo_workflow`](#jamendo_workflow) +1. [`justtakeitfree_workflow`](#justtakeitfree_workflow) 1. [`metropolitan_museum_reingestion_workflow`](#metropolitan_museum_reingestion_workflow) 1. [`metropolitan_museum_workflow`](#metropolitan_museum_workflow) 1. [`nappy_workflow`](#nappy_workflow) @@ -586,6 +588,17 @@ non-commercial apps Jamendo Music has more than 500,000 tracks shared by 40,000 artists from over 150 countries all over the world. Audio quality: uploaded as WAV/ FLAC/ AIFF bit depth: 16/24 sample rate: 44.1 or 48 kHz channels: 1/2 +## `justtakeitfree_workflow` + +Content Provider: Justtakeitfree + +ETL Process: Use the API to identify all CC licensed media. + +Output: TSV file containing the media and the respective meta-data. + +Notes: https://justtakeitfree.com/api/api.php This API requires an API key. For +more details, see https://github.com/WordPress/openverse/pull/2793 + ## `metropolitan_museum_reingestion_workflow` Content Provider: Metropolitan Museum of Art @@ -731,10 +744,13 @@ https://www.rawpixel.com/api/v1/search?tags=$publicdomain&page=1&pagesize=100 ## `recreate_audio_popularity_calculation` This file generates Apache Airflow DAGs that, for the given media type, -completely wipe out the PostgreSQL relations and functions involved in -calculating our standardized popularity metric. It then recreates relations and -functions to make the calculation, and performs an initial calculation. The -results are available in the materialized view for that media type. +completely wipes out and recreates the PostgreSQL functions involved in +calculating our standardized popularity metric. + +Note that they do not drop any tables or views related to popularity, and they +do not perform any popularity calculations. Once this DAG has been run, the +associated popularity refresh DAG must be run in order to actually recalculate +popularity constants and standardized popularity scores using the new functions. These DAGs are not on a schedule, and should only be run manually when new SQL code is deployed for the calculation. @@ -742,10 +758,13 @@ code is deployed for the calculation. ## `recreate_image_popularity_calculation` This file generates Apache Airflow DAGs that, for the given media type, -completely wipe out the PostgreSQL relations and functions involved in -calculating our standardized popularity metric. It then recreates relations and -functions to make the calculation, and performs an initial calculation. The -results are available in the materialized view for that media type. +completely wipes out and recreates the PostgreSQL functions involved in +calculating our standardized popularity metric. + +Note that they do not drop any tables or views related to popularity, and they +do not perform any popularity calculations. Once this DAG has been run, the +associated popularity refresh DAG must be run in order to actually recalculate +popularity constants and standardized popularity scores using the new functions. These DAGs are not on a schedule, and should only be run manually when new SQL code is deployed for the calculation. diff --git a/catalog/dags/common/constants.py b/catalog/dags/common/constants.py index 5ec93f6e9e6..f660d49abd4 100644 --- a/catalog/dags/common/constants.py +++ b/catalog/dags/common/constants.py @@ -1,4 +1,5 @@ import os +from dataclasses import dataclass from datetime import datetime, timedelta from typing import Literal @@ -36,3 +37,41 @@ AWS_RDS_CONN_ID = os.environ.get("AWS_RDS_CONN_ID", AWS_CONN_ID) ES_PROD_HTTP_CONN_ID = "elasticsearch_http_production" REFRESH_POKE_INTERVAL = int(os.getenv("DATA_REFRESH_POKE_INTERVAL", 60 * 30)) + + +@dataclass +class SQLInfo: + """ + Configuration object for a media type's popularity SQL info. + + Required Constructor Arguments: + + media_table: name of the main media table + metrics_table: name of the popularity metrics table + standardized_popularity_fn: name of the standardized_popularity sql + function + popularity_percentile_fn: name of the popularity percentile sql + function + + """ + + media_table: str + metrics_table: str + standardized_popularity_fn: str + popularity_percentile_fn: str + + +SQL_INFO_BY_MEDIA_TYPE = { + AUDIO: SQLInfo( + media_table=AUDIO, + metrics_table="audio_popularity_metrics", + standardized_popularity_fn="standardized_audio_popularity", + popularity_percentile_fn="audio_popularity_percentile", + ), + IMAGE: SQLInfo( + media_table=IMAGE, + metrics_table="image_popularity_metrics", + standardized_popularity_fn="standardized_image_popularity", + popularity_percentile_fn="image_popularity_percentile", + ), +} diff --git a/catalog/dags/common/loader/provider_details.py b/catalog/dags/common/loader/provider_details.py index c219fa21a91..a07108ffc17 100644 --- a/catalog/dags/common/loader/provider_details.py +++ b/catalog/dags/common/loader/provider_details.py @@ -20,6 +20,7 @@ FREESOUND_DEFAULT_PROVIDER = "freesound" INATURALIST_DEFAULT_PROVIDER = "inaturalist" JAMENDO_DEFAULT_PROVIDER = "jamendo" +JUSTTAKEITFREE_IMAGE_PROVIDER = "justtakeitfree" METROPOLITAN_MUSEUM_DEFAULT_PROVIDER = "met" NAPPY_DEFAULT_PROVIDER = "nappy" NYPL_DEFAULT_PROVIDER = "nypl" diff --git a/catalog/dags/common/loader/sql.py b/catalog/dags/common/loader/sql.py index 1e6089fbdb0..9a5fd33dbf9 100644 --- a/catalog/dags/common/loader/sql.py +++ b/catalog/dags/common/loader/sql.py @@ -4,29 +4,24 @@ from airflow.models.abstractoperator import AbstractOperator from psycopg2.errors import InvalidTextRepresentation -from common.constants import AUDIO, IMAGE, MediaType +from common.constants import IMAGE, MediaType, SQLInfo from common.loader import provider_details as prov from common.loader.paths import _extract_media_type -from common.popularity.constants import ( - STANDARDIZED_AUDIO_POPULARITY_FUNCTION, - STANDARDIZED_IMAGE_POPULARITY_FUNCTION, -) from common.sql import PostgresHook from common.storage import columns as col from common.storage.columns import NULL, Column, UpsertStrategy -from common.storage.db_columns import AUDIO_TABLE_COLUMNS, IMAGE_TABLE_COLUMNS +from common.storage.db_columns import setup_db_columns_for_media_type from common.storage.tsv_columns import ( COLUMNS, - CURRENT_AUDIO_TSV_COLUMNS, - CURRENT_IMAGE_TSV_COLUMNS, - required_columns, + REQUIRED_COLUMNS, + setup_tsv_columns_for_media_type, ) +from common.utils import setup_sql_info_for_media_type logger = logging.getLogger(__name__) LOAD_TABLE_NAME_STUB = "load_" -TABLE_NAMES = {AUDIO: AUDIO, IMAGE: IMAGE} DB_USER_NAME = "deploy" NOW = "NOW()" FALSE = "'f'" @@ -44,14 +39,6 @@ prov.SMK_DEFAULT_PROVIDER: "1 month 3 days", } -DB_COLUMNS = { - IMAGE: IMAGE_TABLE_COLUMNS, - AUDIO: AUDIO_TABLE_COLUMNS, -} -TSV_COLUMNS = { - AUDIO: CURRENT_AUDIO_TSV_COLUMNS, - IMAGE: CURRENT_IMAGE_TSV_COLUMNS, -} CURRENT_TSV_VERSION = "001" RETURN_ROW_COUNT = lambda c: c.rowcount # noqa: E731 @@ -67,10 +54,13 @@ def create_column_definitions(table_columns: list[Column], is_loading=True): return ",\n ".join(definitions) +@setup_tsv_columns_for_media_type def create_loading_table( postgres_conn_id: str, identifier: str, - media_type: str = IMAGE, + *, + media_type: str, + tsv_columns: list[Column], ): """Create intermediary table and indices if they do not exist.""" load_table = _get_load_table_name(identifier, media_type=media_type) @@ -78,8 +68,7 @@ def create_loading_table( postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0, ) - loading_table_columns = TSV_COLUMNS[media_type] - columns_definition = f"{create_column_definitions(loading_table_columns)}" + columns_definition = f"{create_column_definitions(tsv_columns)}" table_creation_query = dedent( f""" CREATE UNLOGGED TABLE public.{load_table}( @@ -216,7 +205,7 @@ def clean_intermediate_table_data( ) missing_columns = 0 - for column in required_columns: + for column in REQUIRED_COLUMNS: missing_columns += postgres.run( f"DELETE FROM {load_table} WHERE {column.db_name} IS NULL;", handler=RETURN_ROW_COUNT, @@ -268,13 +257,16 @@ def _is_tsv_column_from_different_version( ) +@setup_sql_info_for_media_type +@setup_db_columns_for_media_type def upsert_records_to_db_table( postgres_conn_id: str, identifier: str, - db_table: str = None, - media_type: str = IMAGE, + *, + media_type: str, + db_columns: list[Column], + sql_info: SQLInfo, tsv_version: str = CURRENT_TSV_VERSION, - popularity_function: str = STANDARDIZED_IMAGE_POPULARITY_FUNCTION, task: AbstractOperator = None, ): """ @@ -285,35 +277,28 @@ def upsert_records_to_db_table( :param postgres_conn_id :param identifier - :param db_table :param media_type :param tsv_version: The version of TSV being processed. This determines which columns are used in the upsert query. :param task To be automagically passed by airflow. :return: """ - if db_table is None: - db_table = TABLE_NAMES.get(media_type, TABLE_NAMES[IMAGE]) - - if media_type is AUDIO: - popularity_function = STANDARDIZED_AUDIO_POPULARITY_FUNCTION - load_table = _get_load_table_name(identifier, media_type=media_type) - logger.info(f"Upserting new records into {db_table}.") + logger.info(f"Upserting new records into {sql_info.media_table}.") postgres = PostgresHook( postgres_conn_id=postgres_conn_id, default_statement_timeout=PostgresHook.get_execution_timeout(task), ) # Remove identifier column - db_columns: list[Column] = DB_COLUMNS[media_type][1:] + db_columns = db_columns[1:] column_inserts = {} column_conflict_values = {} for column in db_columns: args = [] if column.db_name == col.STANDARDIZED_POPULARITY.db_name: args = [ - popularity_function, + sql_info.standardized_popularity_fn, ] if column.upsert_strategy == UpsertStrategy.no_change: @@ -331,13 +316,13 @@ def upsert_records_to_db_table( upsert_conflict_string = ",\n ".join(column_conflict_values.values()) upsert_query = dedent( f""" - INSERT INTO {db_table} AS old + INSERT INTO {sql_info.media_table} AS old ({col.DIRECT_URL.name}, {', '.join(column_inserts.keys())}) SELECT DISTINCT ON ({col.DIRECT_URL.name}) {col.DIRECT_URL.name}, {', '.join(column_inserts.values())} FROM {load_table} as new WHERE NOT EXISTS ( - SELECT {col.DIRECT_URL.name} from {db_table} + SELECT {col.DIRECT_URL.name} from {sql_info.media_table} WHERE {col.DIRECT_URL.name} = new.{col.DIRECT_URL.name} AND MD5({col.FOREIGN_ID.name}) <> MD5(new.{col.FOREIGN_ID.name}) ) diff --git a/catalog/dags/common/popularity/README.md b/catalog/dags/common/popularity/README.md deleted file mode 100644 index 2209225cac0..00000000000 --- a/catalog/dags/common/popularity/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Popularity - -This code allows for the calculation of image popularity within a provider. For -example, this allows us to boost Flickr results that have more views than -others. - -## What this code does - -1. Dump the popularity metrics for each row into a TSV. -2. Compute the 85th percentile for each metric, which is required for the - popularity calculation. This is a heavyweight database calculation, so we - cache it for a really long time. -3. Iterate through the TSV calculating the popularity for each row. -4. UPDATE all rows, setting the `normalized_popularity` key in the `meta_data` - column. - -## To start calculating popularity data for a provider - -1. In your provider script, store the popularity metric you'd like to track in - the `meta_data` column. See - [stocksnap](https://github.com/WordPress/openverse-catalog/blob/6c172033e42a91bcd8f9bf78fd6b933a70bd88bf/openverse_catalog/dags/provider_api_scripts/stocksnap.py#L175-L185) - as an example. -2. Add the provider name and metric to the `IMAGE_POPULARITY_METRICS` constant - in the [popularity/sql.py](sql.py) file. -3. Add the new provider and metric to the - `INSERT INTO public.image_popularity_metrics` statement in - [0004_openledger_image_view.sql](../../../../docker/upstream_db/0004_openledger_image_view.sql#L45). - For now all percentiles should be set to `.85`, this may be adjusted in the - future. diff --git a/catalog/dags/common/popularity/constants.py b/catalog/dags/common/popularity/constants.py deleted file mode 100644 index c2588840e44..00000000000 --- a/catalog/dags/common/popularity/constants.py +++ /dev/null @@ -1,7 +0,0 @@ -IMAGE_VIEW_NAME = "image_view" -AUDIO_VIEW_NAME = "audio_view" -AUDIOSET_VIEW_NAME = "audioset_view" -IMAGE_POPULARITY_PERCENTILE_FUNCTION = "image_popularity_percentile" -AUDIO_POPULARITY_PERCENTILE_FUNCTION = "audio_popularity_percentile" -STANDARDIZED_IMAGE_POPULARITY_FUNCTION = "standardized_image_popularity" -STANDARDIZED_AUDIO_POPULARITY_FUNCTION = "standardized_audio_popularity" diff --git a/catalog/dags/common/popularity/sql.py b/catalog/dags/common/popularity/sql.py deleted file mode 100644 index 939a9f65f95..00000000000 --- a/catalog/dags/common/popularity/sql.py +++ /dev/null @@ -1,507 +0,0 @@ -from collections import namedtuple -from datetime import timedelta -from textwrap import dedent - -from airflow.decorators import task, task_group -from airflow.models.abstractoperator import AbstractOperator - -from common.constants import AUDIO, DAG_DEFAULT_ARGS, IMAGE -from common.loader.sql import TABLE_NAMES -from common.popularity.constants import ( - AUDIO_POPULARITY_PERCENTILE_FUNCTION, - AUDIO_VIEW_NAME, - IMAGE_POPULARITY_PERCENTILE_FUNCTION, - IMAGE_VIEW_NAME, - STANDARDIZED_AUDIO_POPULARITY_FUNCTION, - STANDARDIZED_IMAGE_POPULARITY_FUNCTION, -) -from common.sql import PostgresHook, _single_value -from common.storage import columns as col -from common.storage.db_columns import AUDIO_TABLE_COLUMNS, IMAGE_TABLE_COLUMNS - - -DEFAULT_PERCENTILE = 0.85 - -IMAGE_VIEW_ID_IDX = "image_view_identifier_idx" -AUDIO_VIEW_ID_IDX = "audio_view_identifier_idx" -IMAGE_VIEW_PROVIDER_FID_IDX = "image_view_provider_fid_idx" -AUDIO_VIEW_PROVIDER_FID_IDX = "audio_view_provider_fid_idx" - -# Column name constants -VALUE = "val" -CONSTANT = "constant" -FID = col.FOREIGN_ID.db_name -IDENTIFIER = col.IDENTIFIER.db_name -METADATA_COLUMN = col.META_DATA.db_name -METRIC = "metric" -PARTITION = col.PROVIDER.db_name -PERCENTILE = "percentile" -PROVIDER = col.PROVIDER.db_name - -Column = namedtuple("Column", ["name", "definition"]) - -IMAGE_POPULARITY_METRICS_TABLE_NAME = "image_popularity_metrics" -AUDIO_POPULARITY_METRICS_TABLE_NAME = "audio_popularity_metrics" - -IMAGE_POPULARITY_METRICS = { - "flickr": {"metric": "views"}, - "nappy": {"metric": "downloads"}, - "rawpixel": {"metric": "download_count"}, - "stocksnap": {"metric": "downloads_raw"}, - "wikimedia": {"metric": "global_usage_count"}, -} - -AUDIO_POPULARITY_METRICS = { - "jamendo": {"metric": "listens"}, - "wikimedia_audio": {"metric": "global_usage_count"}, - "freesound": {"metric": "num_downloads"}, -} - -POPULARITY_METRICS_TABLE_COLUMNS = [ - Column(name=PARTITION, definition="character varying(80) PRIMARY KEY"), - Column(name=METRIC, definition="character varying(80)"), - Column(name=PERCENTILE, definition="float"), - Column(name=VALUE, definition="float"), - Column(name=CONSTANT, definition="float"), -] - -# Further refactoring of this nature will be done in -# https://github.com/WordPress/openverse/issues/2678. -POPULARITY_METRICS_BY_MEDIA_TYPE = { - AUDIO: AUDIO_POPULARITY_METRICS, - IMAGE: IMAGE_POPULARITY_METRICS, -} - - -def drop_media_matview( - postgres_conn_id: str, - media_type: str = IMAGE, - db_view: str = IMAGE_VIEW_NAME, - pg_timeout: float = timedelta(minutes=10).total_seconds(), -): - if media_type == AUDIO: - db_view = AUDIO_VIEW_NAME - - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=pg_timeout - ) - postgres.run(f"DROP MATERIALIZED VIEW IF EXISTS public.{db_view} CASCADE;") - - -def drop_media_popularity_relations( - postgres_conn_id, - media_type=IMAGE, - db_view=IMAGE_VIEW_NAME, - metrics=IMAGE_POPULARITY_METRICS_TABLE_NAME, - pg_timeout: float = timedelta(minutes=10).total_seconds(), -): - if media_type == AUDIO: - db_view = AUDIO_VIEW_NAME - metrics = AUDIO_POPULARITY_METRICS_TABLE_NAME - - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=pg_timeout - ) - drop_media_view = f"DROP MATERIALIZED VIEW IF EXISTS public.{db_view} CASCADE;" - drop_popularity_metrics = f"DROP TABLE IF EXISTS public.{metrics} CASCADE;" - postgres.run(drop_media_view) - postgres.run(drop_popularity_metrics) - - -def drop_media_popularity_functions( - postgres_conn_id, - media_type=IMAGE, - standardized_popularity=STANDARDIZED_IMAGE_POPULARITY_FUNCTION, - popularity_percentile=IMAGE_POPULARITY_PERCENTILE_FUNCTION, -): - if media_type == AUDIO: - popularity_percentile = AUDIO_POPULARITY_PERCENTILE_FUNCTION - standardized_popularity = STANDARDIZED_AUDIO_POPULARITY_FUNCTION - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 - ) - drop_standardized_popularity = ( - f"DROP FUNCTION IF EXISTS public.{standardized_popularity} CASCADE;" - ) - drop_popularity_percentile = ( - f"DROP FUNCTION IF EXISTS public.{popularity_percentile} CASCADE;" - ) - postgres.run(drop_standardized_popularity) - postgres.run(drop_popularity_percentile) - - -def create_media_popularity_metrics( - postgres_conn_id, - media_type=IMAGE, - popularity_metrics_table=IMAGE_POPULARITY_METRICS_TABLE_NAME, -): - if media_type == AUDIO: - popularity_metrics_table = AUDIO_POPULARITY_METRICS_TABLE_NAME - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 - ) - popularity_metrics_columns_string = ",\n ".join( - f"{c.name} {c.definition}" for c in POPULARITY_METRICS_TABLE_COLUMNS - ) - query = dedent( - f""" - CREATE TABLE public.{popularity_metrics_table} ( - {popularity_metrics_columns_string} - ); - """ - ) - postgres.run(query) - - -@task -def update_media_popularity_metrics( - postgres_conn_id, - media_type=IMAGE, - popularity_metrics=None, - popularity_metrics_table=IMAGE_POPULARITY_METRICS_TABLE_NAME, - popularity_percentile=IMAGE_POPULARITY_PERCENTILE_FUNCTION, - task: AbstractOperator = None, -): - if popularity_metrics is None: - popularity_metrics = POPULARITY_METRICS_BY_MEDIA_TYPE[media_type] - if media_type == AUDIO: - popularity_metrics_table = AUDIO_POPULARITY_METRICS_TABLE_NAME - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, - default_statement_timeout=PostgresHook.get_execution_timeout(task), - ) - - column_names = [c.name for c in POPULARITY_METRICS_TABLE_COLUMNS] - - # Note that we do not update the val and constant. That is only done during the - # calculation tasks. In other words, we never want to clear out the current value of - # the popularity constant unless we're already done calculating the new one, since - # that can be a time consuming process. - updates_string = ",\n ".join( - f"{c}=EXCLUDED.{c}" - for c in column_names - if c not in [PARTITION, CONSTANT, VALUE] - ) - popularity_metric_inserts = _get_popularity_metric_insert_values_string( - popularity_metrics - ) - - query = dedent( - f""" - INSERT INTO public.{popularity_metrics_table} ( - {', '.join(column_names)} - ) VALUES - {popularity_metric_inserts} - ON CONFLICT ({PARTITION}) - DO UPDATE SET - {updates_string} - ; - """ - ) - return postgres.run(query) - - -@task -def calculate_media_popularity_percentile_value( - postgres_conn_id, - provider, - media_type=IMAGE, - popularity_metrics_table=IMAGE_POPULARITY_METRICS_TABLE_NAME, - popularity_percentile=IMAGE_POPULARITY_PERCENTILE_FUNCTION, - task: AbstractOperator = None, -): - if media_type == AUDIO: - popularity_metrics_table = AUDIO_POPULARITY_METRICS_TABLE_NAME - popularity_percentile = AUDIO_POPULARITY_PERCENTILE_FUNCTION - - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, - default_statement_timeout=PostgresHook.get_execution_timeout(task), - ) - - # Calculate the percentile value. E.g. if `percentile` = 0.80, then we'll - # calculate the _value_ of the 80th percentile for this provider's - # popularity metric. - calculate_new_percentile_value_query = dedent( - f""" - SELECT {popularity_percentile}({PARTITION}, {METRIC}, {PERCENTILE}) - FROM {popularity_metrics_table} - WHERE {col.PROVIDER.db_name}='{provider}'; - """ - ) - - return postgres.run(calculate_new_percentile_value_query, handler=_single_value) - - -@task -def update_percentile_and_constants_values_for_provider( - postgres_conn_id, - provider, - raw_percentile_value, - media_type=IMAGE, - popularity_metrics=None, - popularity_metrics_table=IMAGE_POPULARITY_METRICS_TABLE_NAME, - task: AbstractOperator = None, -): - if popularity_metrics is None: - popularity_metrics = POPULARITY_METRICS_BY_MEDIA_TYPE.get(media_type, {}) - if media_type == AUDIO: - popularity_metrics_table = AUDIO_POPULARITY_METRICS_TABLE_NAME - - if raw_percentile_value is None: - # Occurs when a provider has a metric configured, but there are no records - # with any data for that metric. - return - - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, - default_statement_timeout=PostgresHook.get_execution_timeout(task), - ) - - provider_info = popularity_metrics.get(provider) - percentile = provider_info.get("percentile", DEFAULT_PERCENTILE) - - # Calculate the popularity constant using the percentile value - percentile_value = raw_percentile_value or 1 - new_constant = ((1 - percentile) / (percentile)) * percentile_value - - # Update the percentile value and constant in the metrics table - update_constant_query = dedent( - f""" - UPDATE public.{popularity_metrics_table} - SET {VALUE} = {percentile_value}, {CONSTANT} = {new_constant} - WHERE {col.PROVIDER.db_name} = '{provider}'; - """ - ) - return postgres.run(update_constant_query) - - -@task_group -def update_percentile_and_constants_for_provider( - postgres_conn_id, provider, media_type=IMAGE, execution_timeout=None -): - calculate_percentile_val = calculate_media_popularity_percentile_value.override( - task_id="calculate_percentile_value", - execution_timeout=execution_timeout - or DAG_DEFAULT_ARGS.get("execution_timeout"), - )( - postgres_conn_id=postgres_conn_id, - provider=provider, - media_type=media_type, - ) - calculate_percentile_val.doc = ( - "Calculate the percentile popularity value for this provider. For" - " example, if this provider has `percentile`=0.80 and `metric`='views'," - " calculate the 80th percentile value of views for all records for this" - " provider." - ) - - update_metrics_table = update_percentile_and_constants_values_for_provider.override( - task_id="update_percentile_values_and_constant", - )( - postgres_conn_id=postgres_conn_id, - provider=provider, - raw_percentile_value=calculate_percentile_val, - media_type=media_type, - ) - update_metrics_table.doc = ( - "Given the newly calculated percentile value, calculate the" - " popularity constant and update the metrics table with the newly" - " calculated values." - ) - - -def _get_popularity_metric_insert_values_string( - popularity_metrics, - default_percentile=DEFAULT_PERCENTILE, -): - return ",\n ".join( - _format_popularity_metric_insert_tuple_string( - provider, - provider_info["metric"], - provider_info.get("percentile", default_percentile), - ) - for provider, provider_info in popularity_metrics.items() - ) - - -def _format_popularity_metric_insert_tuple_string( - provider, - metric, - percentile, -): - # Default null val and constant - return f"('{provider}', '{metric}', {percentile}, null, null)" - - -def create_media_popularity_percentile_function( - postgres_conn_id, - media_type=IMAGE, - popularity_percentile=IMAGE_POPULARITY_PERCENTILE_FUNCTION, - media_table=TABLE_NAMES[IMAGE], -): - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 - ) - if media_type == AUDIO: - popularity_percentile = AUDIO_POPULARITY_PERCENTILE_FUNCTION - media_table = TABLE_NAMES[AUDIO] - query = dedent( - f""" - CREATE OR REPLACE FUNCTION public.{popularity_percentile}( - provider text, pop_field text, percentile float - ) RETURNS FLOAT AS $$ - SELECT percentile_disc($3) WITHIN GROUP ( - ORDER BY ({METADATA_COLUMN}->>$2)::float - ) - FROM {media_table} WHERE {PARTITION}=$1; - $$ - LANGUAGE SQL - STABLE - RETURNS NULL ON NULL INPUT; - """ - ) - postgres.run(query) - - -def create_standardized_media_popularity_function( - postgres_conn_id, - media_type=IMAGE, - function_name=STANDARDIZED_IMAGE_POPULARITY_FUNCTION, - popularity_metrics=IMAGE_POPULARITY_METRICS_TABLE_NAME, -): - if media_type == AUDIO: - popularity_metrics = AUDIO_POPULARITY_METRICS_TABLE_NAME - function_name = STANDARDIZED_AUDIO_POPULARITY_FUNCTION - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 - ) - query = dedent( - f""" - CREATE OR REPLACE FUNCTION public.{function_name}( - provider text, meta_data jsonb - ) RETURNS FLOAT AS $$ - SELECT ($2->>{METRIC})::float / (($2->>{METRIC})::float + {CONSTANT}) - FROM {popularity_metrics} WHERE provider=$1; - $$ - LANGUAGE SQL - STABLE - RETURNS NULL ON NULL INPUT; - """ - ) - postgres.run(query) - - -def create_media_view( - postgres_conn_id, - media_type=IMAGE, - standardized_popularity_func=STANDARDIZED_IMAGE_POPULARITY_FUNCTION, - table_name=TABLE_NAMES[IMAGE], - db_columns=IMAGE_TABLE_COLUMNS, - db_view_name=IMAGE_VIEW_NAME, - db_view_id_idx=IMAGE_VIEW_ID_IDX, - db_view_provider_fid_idx=IMAGE_VIEW_PROVIDER_FID_IDX, - task: AbstractOperator = None, -): - if media_type == AUDIO: - table_name = TABLE_NAMES[AUDIO] - db_columns = AUDIO_TABLE_COLUMNS - db_view_name = AUDIO_VIEW_NAME - db_view_id_idx = AUDIO_VIEW_ID_IDX - db_view_provider_fid_idx = AUDIO_VIEW_PROVIDER_FID_IDX - standardized_popularity_func = STANDARDIZED_AUDIO_POPULARITY_FUNCTION - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, - default_statement_timeout=PostgresHook.get_execution_timeout(task), - ) - # We want to copy all columns except standardized popularity, which is calculated - columns_to_select = (", ").join( - [ - column.db_name - for column in db_columns - if column.db_name != col.STANDARDIZED_POPULARITY.db_name - ] - ) - create_view_query = dedent( - f""" - CREATE MATERIALIZED VIEW public.{db_view_name} AS - SELECT - {columns_to_select}, - {standardized_popularity_func}( - {table_name}.{PARTITION}, - {table_name}.{METADATA_COLUMN} - ) AS standardized_popularity - FROM {table_name}; - """ - ) - add_idx_query = dedent( - f""" - CREATE UNIQUE INDEX {db_view_id_idx} - ON public.{db_view_name} ({IDENTIFIER}); - CREATE UNIQUE INDEX {db_view_provider_fid_idx} - ON public.{db_view_name} - USING btree({PROVIDER}, md5({FID})); - """ - ) - postgres.run(create_view_query) - postgres.run(add_idx_query) - - -def get_providers_with_popularity_data_for_media_type( - postgres_conn_id: str, - media_type: str = IMAGE, - popularity_metrics: str = IMAGE_POPULARITY_METRICS_TABLE_NAME, - pg_timeout: float = timedelta(minutes=10).total_seconds(), -): - """ - Return a list of distinct `provider`s that support popularity data, - for the given media type. - """ - if media_type == AUDIO: - popularity_metrics = AUDIO_POPULARITY_METRICS_TABLE_NAME - - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, default_statement_timeout=pg_timeout - ) - providers = postgres.get_records( - f"SELECT DISTINCT provider FROM public.{popularity_metrics};" - ) - - return [x[0] for x in providers] - - -def format_update_standardized_popularity_query( - media_type=IMAGE, - standardized_popularity_func=STANDARDIZED_IMAGE_POPULARITY_FUNCTION, - table_name=TABLE_NAMES[IMAGE], - db_columns=IMAGE_TABLE_COLUMNS, - db_view_name=IMAGE_VIEW_NAME, - db_view_id_idx=IMAGE_VIEW_ID_IDX, - db_view_provider_fid_idx=IMAGE_VIEW_PROVIDER_FID_IDX, - task: AbstractOperator = None, -): - """ - Create a SQL query for updating the standardized popularity for the given - media type. Only the `SET ...` portion of the query is returned, to be used - by a `batched_update` DagRun. - """ - if media_type == AUDIO: - table_name = TABLE_NAMES[AUDIO] - standardized_popularity_func = STANDARDIZED_AUDIO_POPULARITY_FUNCTION - - return ( - f"SET {col.STANDARDIZED_POPULARITY.db_name} = {standardized_popularity_func}" - f"({table_name}.{PARTITION}, {table_name}.{METADATA_COLUMN})" - ) - - -def update_db_view( - postgres_conn_id, media_type=IMAGE, db_view_name=IMAGE_VIEW_NAME, task=None -): - if media_type == AUDIO: - db_view_name = AUDIO_VIEW_NAME - postgres = PostgresHook( - postgres_conn_id=postgres_conn_id, - default_statement_timeout=PostgresHook.get_execution_timeout(task), - ) - postgres.run(f"REFRESH MATERIALIZED VIEW CONCURRENTLY {db_view_name};") diff --git a/catalog/dags/common/sql.py b/catalog/dags/common/sql.py index 15e7940099a..f9f2f308bcf 100644 --- a/catalog/dags/common/sql.py +++ b/catalog/dags/common/sql.py @@ -27,7 +27,7 @@ # https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/_api/airflow/providers/postgres/hooks/postgres/index.html#airflow.providers.postgres.hooks.postgres.PostgresHook.copy_expert # noqa -def _single_value(cursor): +def single_value(cursor): try: row = cursor.fetchone() return row[0] diff --git a/catalog/dags/common/storage/db_columns.py b/catalog/dags/common/storage/db_columns.py index ad3c1bef602..f96c26915d2 100644 --- a/catalog/dags/common/storage/db_columns.py +++ b/catalog/dags/common/storage/db_columns.py @@ -2,7 +2,9 @@ This module contains the lists of database columns in the same order as in the main media tables within the database. """ +from common.constants import AUDIO, IMAGE from common.storage import columns as col +from common.utils import setup_kwargs_for_media_type # Columns that are only in the main table; @@ -79,3 +81,10 @@ col.STANDARDIZED_POPULARITY, col.AUDIO_SET_FOREIGN_IDENTIFIER, ] + +DB_COLUMNS_BY_MEDIA_TYPE = {AUDIO: AUDIO_TABLE_COLUMNS, IMAGE: IMAGE_TABLE_COLUMNS} + + +def setup_db_columns_for_media_type(func: callable) -> callable: + """Provide media-type-specific DB columns as a kwarg to the decorated function.""" + return setup_kwargs_for_media_type(DB_COLUMNS_BY_MEDIA_TYPE, "db_columns")(func) diff --git a/catalog/dags/common/storage/tsv_columns.py b/catalog/dags/common/storage/tsv_columns.py index 4d956822864..397d262e843 100644 --- a/catalog/dags/common/storage/tsv_columns.py +++ b/catalog/dags/common/storage/tsv_columns.py @@ -1,6 +1,7 @@ from common.constants import AUDIO, IMAGE from common.storage import columns as col from common.storage.columns import Column +from common.utils import setup_kwargs_for_media_type # Image has 'legacy' 000 version @@ -120,6 +121,13 @@ CURRENT_AUDIO_TSV_COLUMNS: list[Column] = COLUMNS[AUDIO][CURRENT_VERSION[AUDIO]] CURRENT_IMAGE_TSV_COLUMNS: list[Column] = COLUMNS[IMAGE][CURRENT_VERSION[IMAGE]] - # This list is the same for all media types -required_columns = [col for col in CURRENT_IMAGE_TSV_COLUMNS if col.required] +REQUIRED_COLUMNS = [col for col in CURRENT_IMAGE_TSV_COLUMNS if col.required] + + +def setup_tsv_columns_for_media_type(func: callable) -> callable: + """Provide media-type-specific TSV columns as a kwarg to the decorated function.""" + return setup_kwargs_for_media_type( + {AUDIO: CURRENT_AUDIO_TSV_COLUMNS, IMAGE: CURRENT_IMAGE_TSV_COLUMNS}, + "tsv_columns", + )(func) diff --git a/catalog/dags/common/utils.py b/catalog/dags/common/utils.py new file mode 100644 index 00000000000..aa6f238b1b8 --- /dev/null +++ b/catalog/dags/common/utils.py @@ -0,0 +1,80 @@ +import functools +from inspect import _ParameterKind, signature +from typing import Any + +from common.constants import SQL_INFO_BY_MEDIA_TYPE + + +def setup_kwargs_for_media_type( + values_by_media_type: dict[str, Any], kwarg_name: str +) -> callable: + """ + Create a decorator which provides media_type-specific information as parameters + for the called function. The called function must itself have a media_type kwarg, + which is used to select values. + + Required arguments: + + values_by_media_type: A dict mapping media types to arbitrary values, which may + themselves be of any type + kwarg_name: The name of the kwarg that will be passed to the called + function + + Usage example: + + @setup_kwargs_for_media_type(MY_VALS_BY_MEDIA_TYPE, 'foo') + def my_fun(media_type, foo = None): + ... + + When `my_fun` is called, if the `foo` kwarg is not passed explicitly, it will be set + to the value of MY_VALS_BY_MEDIA_TYPE[media_type]. An error is raised for an invalid + media type. + """ + + def wrap(func: callable) -> callable: + """ + Provide the appropriate value for the media_type passed in the called function. + If the called function is already explicitly passed a value for `kwarg_name`, + use that value instead. + """ + + # The called function must be supplied a `media_type` keyword-only argument. It + # cannot allow the value to be supplied as a positional argument. + if ( + media_type := signature(func).parameters.get("media_type") + ) is None or media_type.kind != _ParameterKind.KEYWORD_ONLY: + raise Exception( + f"Improperly configured function `{func.__qualname__}`:" + " `media_type` must be a keyword-only argument." + ) + + @functools.wraps(func) + def wrapped(*args, **kwargs): + # First check to see if the called function was already passed a value + # for the given kwarg name. If so, simply use this. + if (media_info := kwargs.pop(kwarg_name, None)) is None: + # The called function should be passed a `media_type`, whose value + # is a key in the values dict + media_type = kwargs.get("media_type", None) + + if media_type not in values_by_media_type.keys(): + raise ValueError( + f"{func.__qualname__}: No values matching media type" + f" `{media_type}`" + ) + + # Get the value corresponding to the media type + media_info = values_by_media_type.get(media_type) + + # Add the media-type-specific info to kwargs, using the passed kwarg name + kwargs[kwarg_name] = media_info + return func(*args, **kwargs) + + return wrapped + + return wrap + + +def setup_sql_info_for_media_type(func: callable) -> callable: + """Provide media-type-specific SQLInfo as a kwarg to the decorated function.""" + return setup_kwargs_for_media_type(SQL_INFO_BY_MEDIA_TYPE, "sql_info")(func) diff --git a/catalog/dags/data_refresh/dag_factory.py b/catalog/dags/data_refresh/dag_factory.py index de8291d6aad..f2fd382574e 100644 --- a/catalog/dags/data_refresh/dag_factory.py +++ b/catalog/dags/data_refresh/dag_factory.py @@ -34,7 +34,7 @@ OPENLEDGER_API_CONN_ID, XCOM_PULL_TEMPLATE, ) -from common.sql import PGExecuteQueryOperator, _single_value +from common.sql import PGExecuteQueryOperator, single_value from data_refresh.data_refresh_task_factory import create_data_refresh_task_group from data_refresh.data_refresh_types import DATA_REFRESH_CONFIGS, DataRefresh from data_refresh.reporting import report_record_difference @@ -93,7 +93,7 @@ def create_data_refresh_dag(data_refresh: DataRefresh, external_dag_ids: Sequenc task_id="get_before_record_count", conn_id=OPENLEDGER_API_CONN_ID, sql=count_sql, - handler=_single_value, + handler=single_value, return_last=True, ) @@ -108,7 +108,7 @@ def create_data_refresh_dag(data_refresh: DataRefresh, external_dag_ids: Sequenc task_id="get_after_record_count", conn_id=OPENLEDGER_API_CONN_ID, sql=count_sql, - handler=_single_value, + handler=single_value, return_last=True, ) diff --git a/catalog/dags/data_refresh/recreate_view_data_task_factory.py b/catalog/dags/data_refresh/recreate_view_data_task_factory.py deleted file mode 100644 index 21834ed4f1b..00000000000 --- a/catalog/dags/data_refresh/recreate_view_data_task_factory.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -# Recreate Materialized View Task Factory -This file generates a TaskGroup that recreates the materialized view for a -given media type, using a factory function. - -The task drops and recreates the materialized view, but not the underlying tables. This -means that the only effect is to add or update data (including popularity data) -for records which have been ingested since the last time the view was -refreshed. - -This should be run every time before a data refresh is triggered. -""" -from airflow.operators.python import PythonOperator -from airflow.utils.task_group import TaskGroup -from airflow.utils.trigger_rule import TriggerRule - -from common.constants import POSTGRES_CONN_ID -from common.popularity import sql -from data_refresh import reporting -from data_refresh.data_refresh_types import DataRefresh - - -GROUP_ID = "recreate_matview" -DROP_DB_VIEW_TASK_ID = "drop_materialized_popularity_view" -CREATE_DB_VIEW_TASK_ID = "create_materialized_popularity_view" - - -def create_recreate_view_data_task(data_refresh: DataRefresh): - """ - Create the recreate related tasks. - - The task drops and recreates the materialized view for the given media type. The - view collates popularity data for each record. Recreating has the effect of adding - popularity data for records that were ingested since the last time the view was - created or refreshed, and updating popularity data for existing records. It also - creates a reporting task which will report the status of the matview refresh once - it is complete. - - The view is dropped and recreated rather than refreshed, because refreshing the view - takes much longer in production and times out. - - Required Arguments: - - data_refresh: configuration information for the data refresh - """ - with TaskGroup(group_id=GROUP_ID) as recreate_matview: - drop_matview = PythonOperator( - task_id=DROP_DB_VIEW_TASK_ID, - python_callable=sql.drop_media_matview, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": data_refresh.media_type, - }, - trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS, - retries=0, - ) - create_matview = PythonOperator( - task_id=CREATE_DB_VIEW_TASK_ID, - python_callable=sql.create_media_view, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": data_refresh.media_type, - }, - execution_timeout=data_refresh.create_materialized_view_timeout, - retries=0, - doc_md=create_recreate_view_data_task.__doc__, - ) - recreate_status = PythonOperator( - task_id=f"report_{GROUP_ID}_status", - python_callable=reporting.report_status, - op_kwargs={ - "media_type": data_refresh.media_type, - "dag_id": data_refresh.dag_id, - "message": "Matview refresh complete | " - "_Next: ingestion server data refresh_", - }, - ) - - drop_matview >> create_matview >> recreate_status - - return recreate_matview diff --git a/catalog/dags/database/batched_update/batched_update.py b/catalog/dags/database/batched_update/batched_update.py index 74c2f789396..d09c2df1d14 100644 --- a/catalog/dags/database/batched_update/batched_update.py +++ b/catalog/dags/database/batched_update/batched_update.py @@ -7,7 +7,7 @@ from common import slack from common.constants import POSTGRES_CONN_ID -from common.sql import PostgresHook, _single_value +from common.sql import PostgresHook, single_value from database.batched_update import constants @@ -44,7 +44,7 @@ def get_expected_update_count( dry_run=dry_run, sql_template=constants.SELECT_TEMP_TABLE_COUNT_QUERY, query_id=query_id, - handler=_single_value, + handler=single_value, ) diff --git a/catalog/dags/database/recreate_popularity_calculation_dag_factory.py b/catalog/dags/database/recreate_popularity_calculation_dag_factory.py deleted file mode 100644 index 7381b1d28aa..00000000000 --- a/catalog/dags/database/recreate_popularity_calculation_dag_factory.py +++ /dev/null @@ -1,138 +0,0 @@ -""" -This file generates Apache Airflow DAGs that, for the given media type, -completely wipe out the PostgreSQL relations and functions involved in -calculating our standardized popularity metric. It then recreates relations -and functions to make the calculation, and performs an initial calculation. -The results are available in the materialized view for that media type. - -These DAGs are not on a schedule, and should only be run manually when new -SQL code is deployed for the calculation. -""" -from airflow import DAG -from airflow.operators.python import PythonOperator - -from common.constants import DAG_DEFAULT_ARGS, POSTGRES_CONN_ID -from common.popularity import sql -from data_refresh.data_refresh_types import DATA_REFRESH_CONFIGS, DataRefresh - - -def create_recreate_popularity_calculation_dag(data_refresh: DataRefresh): - media_type = data_refresh.media_type - default_args = { - **DAG_DEFAULT_ARGS, - **data_refresh.default_args, - } - - dag = DAG( - dag_id=f"recreate_{media_type}_popularity_calculation", - default_args=default_args, - max_active_runs=1, - schedule=None, - catchup=False, - doc_md=__doc__, - tags=["database", "data_refresh"], - ) - with dag: - drop_relations = PythonOperator( - task_id="drop_popularity_relations", - python_callable=sql.drop_media_popularity_relations, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc="Drop the existing popularity views and tables.", - ) - - drop_functions = PythonOperator( - task_id="drop_popularity_functions", - python_callable=sql.drop_media_popularity_functions, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc="Drop the existing popularity functions.", - ) - - create_metrics_table = PythonOperator( - task_id="create_popularity_metrics_table", - python_callable=sql.create_media_popularity_metrics, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc=( - "Create the popularity metrics table, which stores popularity " - "metrics and target percentiles per provider." - ), - ) - - update_metrics_table = PythonOperator( - task_id="update_popularity_metrics_table", - python_callable=sql.update_media_popularity_metrics, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc="Update the popularity metrics table with values for each provider.", - ) - - create_percentile_function = PythonOperator( - task_id="create_popularity_percentile_function", - python_callable=sql.create_media_popularity_percentile_function, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc=( - "Create the function for calculating popularity percentile values, " - "used for calculating the popularity constants for each provider." - ), - ) - - create_popularity_function = PythonOperator( - task_id="create_standardized_popularity_function", - python_callable=sql.create_standardized_media_popularity_function, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - doc=( - "Create the function that calculates popularity data for a given " - "record, standardizing across providers with the generated popularity " - "constants." - ), - ) - - create_matview = PythonOperator( - task_id="create_materialized_popularity_view", - python_callable=sql.create_media_view, - op_kwargs={ - "postgres_conn_id": POSTGRES_CONN_ID, - "media_type": media_type, - }, - execution_timeout=data_refresh.create_materialized_view_timeout, - doc=( - "Create the materialized view containing standardized popularity data " - "for all records." - ), - ) - - ( - [drop_relations, drop_functions] - >> create_metrics_table - >> [update_metrics_table, create_percentile_function] - >> create_popularity_function - >> create_matview - ) - - return dag - - -# Generate a recreate_popularity_calculation DAG for each DATA_REFRESH_CONFIG. -for data_refresh in DATA_REFRESH_CONFIGS: - recreate_popularity_calculation_dag = create_recreate_popularity_calculation_dag( - data_refresh - ) - globals()[ - recreate_popularity_calculation_dag.dag_id - ] = recreate_popularity_calculation_dag diff --git a/catalog/dags/popularity/dag_factory.py b/catalog/dags/popularity/popularity_refresh_dag_factory.py similarity index 66% rename from catalog/dags/popularity/dag_factory.py rename to catalog/dags/popularity/popularity_refresh_dag_factory.py index dfb55645e04..567e907d2d1 100644 --- a/catalog/dags/popularity/dag_factory.py +++ b/catalog/dags/popularity/popularity_refresh_dag_factory.py @@ -28,17 +28,14 @@ from airflow import DAG from airflow.decorators import task from airflow.operators.trigger_dagrun import TriggerDagRunOperator +from popularity import sql from popularity.popularity_refresh_types import ( POPULARITY_REFRESH_CONFIGS, PopularityRefresh, ) -from popularity.refresh_popularity_metrics_task_factory import ( - create_refresh_popularity_metrics_task_group, -) from common import slack from common.constants import DAG_DEFAULT_ARGS, POSTGRES_CONN_ID -from common.popularity import sql from database.batched_update.constants import DAG_ID as BATCHED_UPDATE_DAG_ID @@ -70,52 +67,6 @@ def get_last_updated_time(): return datetime.utcnow() -@task -def get_providers_update_confs( - postgres_conn_id: str, - popularity_refresh: PopularityRefresh, - last_updated_time: datetime, -): - """ - Build a list of DagRun confs for each provider of this media type. The confs will - be used by the `batched_update` DAG to perform a batched update of all existing - records, to recalculate their standardized_popularity with the new popularity - constant. Providers that do not support popularity data are omitted. - """ - # For the media type, get a list of the providers who support popularity data - providers = sql.get_providers_with_popularity_data_for_media_type( - postgres_conn_id, popularity_refresh.media_type - ) - - # For each provider, create a conf that will be used by the batched_update to - # refresh standardized popularity scores. - return [ - { - # Uniquely identify the query - "query_id": ( - f"{provider}_popularity_refresh_{last_updated_time.strftime('%Y%m%d')}" - ), - "table_name": popularity_refresh.media_type, - # Query used to select records that should be refreshed - "select_query": ( - f"WHERE provider='{provider}' AND updated_on <" - f" '{last_updated_time.strftime('%Y-%m-%d %H:%M:%S')}'" - ), - # Query used to update the standardized_popularity - "update_query": sql.format_update_standardized_popularity_query( - popularity_refresh.media_type - ), - "batch_size": 10_000, - "update_timeout": ( - popularity_refresh.refresh_popularity_batch_timeout.total_seconds() - ), - "dry_run": False, - "resume_update": False, - } - for provider in providers - ] - - def create_popularity_refresh_dag(popularity_refresh: PopularityRefresh): """ Instantiate a DAG for a popularity refresh. @@ -144,11 +95,54 @@ def create_popularity_refresh_dag(popularity_refresh: PopularityRefresh): ) with dag: - # Refresh the underlying popularity tables. This step recalculates the - # popularity constants, which will later be used to calculate updated - # standardized popularity scores. - refresh_popularity_metrics = create_refresh_popularity_metrics_task_group( - popularity_refresh + update_metrics = sql.update_media_popularity_metrics.override( + task_id="update_popularity_metrics", + )( + postgres_conn_id=POSTGRES_CONN_ID, + media_type=popularity_refresh.media_type, + popularity_metrics=popularity_refresh.popularity_metrics, + ) + update_metrics.doc = ( + "Updates the metrics and target percentiles. If a popularity" + " metric is configured for a new provider, this step will add it" + " to the metrics table." + ) + + update_metrics_status = notify_slack.override( + task_id="report_update_popularity_metrics_status" + )( + text="Popularity metrics update complete | _Next: popularity" + " constants update_", + media_type=popularity_refresh.media_type, + dag_id=popularity_refresh.dag_id, + ) + + update_constants = ( + sql.update_percentile_and_constants_for_provider.override( + group_id="refresh_popularity_metrics_and_constants", + ) + .partial( + postgres_conn_id=POSTGRES_CONN_ID, + media_type=popularity_refresh.media_type, + execution_timeout=popularity_refresh.refresh_metrics_timeout, + popularity_metrics=popularity_refresh.popularity_metrics, + ) + .expand(provider=list(popularity_refresh.popularity_metrics.keys())) + ) + update_constants.doc = ( + "Recalculate the percentile values and popularity constants" + " for each provider, and update them in the metrics table. The" + " popularity constants will be used to calculate standardized" + " popularity scores." + ) + + update_constants_status = notify_slack.override( + task_id="report_update_popularity_metrics_status" + )( + text="Popularity constants update complete | _Next: refresh" + " popularity scores_", + media_type=popularity_refresh.media_type, + dag_id=popularity_refresh.dag_id, ) # Once popularity constants have been calculated, establish the cutoff time @@ -170,7 +164,7 @@ def create_popularity_refresh_dag(popularity_refresh: PopularityRefresh): retries=0, ).expand( # Build the conf for each provider - conf=get_providers_update_confs( + conf=sql.get_providers_update_confs( POSTGRES_CONN_ID, popularity_refresh, get_cutoff_time ) ) @@ -185,12 +179,9 @@ def create_popularity_refresh_dag(popularity_refresh: PopularityRefresh): ) # Set up task dependencies - ( - refresh_popularity_metrics - >> get_cutoff_time - >> refresh_popularity_scores - >> notify_complete - ) + update_metrics >> [update_metrics_status, update_constants] + update_constants >> [update_constants_status, get_cutoff_time] + get_cutoff_time >> refresh_popularity_scores >> notify_complete return dag diff --git a/catalog/dags/popularity/popularity_refresh_types.py b/catalog/dags/popularity/popularity_refresh_types.py index 76d3b4631b2..b25867e9e27 100644 --- a/catalog/dags/popularity/popularity_refresh_types.py +++ b/catalog/dags/popularity/popularity_refresh_types.py @@ -19,7 +19,12 @@ class PopularityRefresh: Required Constructor Arguments: - media_type: str describing the media type to be refreshed. + media_type: str describing the media type to be refreshed. + popularity_metrics: dictionary mapping providers of this media type + to their popularity metrics and, optionally, percentile. If + the percentile key is not included, the default value will + be used. + Ex: {"my_provider": {"metric": "views", "percentile": 0.5}} Optional Constructor Arguments: @@ -45,6 +50,7 @@ class PopularityRefresh: dag_id: str = field(init=False) media_type: str + popularity_metrics: dict default_args: dict | None = field(default_factory=dict) start_date: datetime = datetime(2023, 1, 1) schedule: str | None = "@monthly" @@ -61,11 +67,23 @@ def __post_init__(self): PopularityRefresh( media_type="image", refresh_metrics_timeout=timedelta(hours=24), + popularity_metrics={ + "flickr": {"metric": "views"}, + "nappy": {"metric": "downloads"}, + "rawpixel": {"metric": "download_count"}, + "stocksnap": {"metric": "downloads_raw"}, + "wikimedia": {"metric": "global_usage_count"}, + }, ), PopularityRefresh( media_type="audio", # Poke every minute, instead of every thirty minutes poke_interval=int(os.getenv("DATA_REFRESH_POKE_INTERVAL", 60)), refresh_popularity_timeout=timedelta(days=1), + popularity_metrics={ + "jamendo": {"metric": "listens"}, + "wikimedia_audio": {"metric": "global_usage_count"}, + "freesound": {"metric": "num_downloads"}, + }, ), ] diff --git a/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py b/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py new file mode 100644 index 00000000000..31261ec38fc --- /dev/null +++ b/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py @@ -0,0 +1,79 @@ +""" +This file generates Apache Airflow DAGs that, for the given media type, +completely wipes out and recreates the PostgreSQL functions involved in +calculating our standardized popularity metric. + +Note that they do not drop any tables or views related to popularity, and +they do not perform any popularity calculations. Once this DAG has been run, +the associated popularity refresh DAG must be run in order to actually +recalculate popularity constants and standardized popularity scores using +the new functions. + +These DAGs are not on a schedule, and should only be run manually when new +SQL code is deployed for the calculation. +""" +from airflow import DAG +from popularity import sql +from popularity.popularity_refresh_types import ( + POPULARITY_REFRESH_CONFIGS, + PopularityRefresh, +) + +from common.constants import DAG_DEFAULT_ARGS, POSTGRES_CONN_ID + + +def create_recreate_popularity_calculation_dag(popularity_refresh: PopularityRefresh): + media_type = popularity_refresh.media_type + default_args = { + **DAG_DEFAULT_ARGS, + **popularity_refresh.default_args, + } + + dag = DAG( + dag_id=f"recreate_{media_type}_popularity_calculation", + default_args=default_args, + max_active_runs=1, + schedule=None, + catchup=False, + doc_md=__doc__, + tags=["database", "data_refresh"], + ) + with dag: + drop_functions = sql.drop_media_popularity_functions( + postgres_conn_id=POSTGRES_CONN_ID, + media_type=media_type, + ) + drop_functions.doc = "Drop the existing popularity functions." + + create_percentile_function = sql.create_media_popularity_percentile_function( + postgres_conn_id=POSTGRES_CONN_ID, + media_type=media_type, + ) + create_percentile_function.doc = ( + "Create the function for calculating popularity percentile values, " + "used for calculating the popularity constants for each provider." + ) + + create_popularity_function = sql.create_standardized_media_popularity_function( + postgres_conn_id=POSTGRES_CONN_ID, + media_type=media_type, + ) + create_popularity_function.doc = ( + "Create the function that calculates popularity data for a given " + "record, standardizing across providers with the generated popularity " + "constants." + ) + + (drop_functions >> create_percentile_function >> create_popularity_function) + + return dag + + +# Generate a recreate_popularity_calculation DAG for each POPULARITY_REFRESH_CONFIGS. +for popularity_refresh in POPULARITY_REFRESH_CONFIGS: + recreate_popularity_calculation_dag = create_recreate_popularity_calculation_dag( + popularity_refresh + ) + globals()[ + recreate_popularity_calculation_dag.dag_id + ] = recreate_popularity_calculation_dag diff --git a/catalog/dags/popularity/refresh_popularity_metrics_task_factory.py b/catalog/dags/popularity/refresh_popularity_metrics_task_factory.py deleted file mode 100644 index 428e23f7456..00000000000 --- a/catalog/dags/popularity/refresh_popularity_metrics_task_factory.py +++ /dev/null @@ -1,108 +0,0 @@ -""" -# Refresh Popularity Metrics TaskGroup Factory -This file generates a TaskGroup that refreshes the underlying popularity DB -tables, using a factory function. - -This step updates any changes to popularity metrics, and recalculates the -popularity constants. It should be run at least once every month, or whenever -a new popularity metric is added. Scheduling is handled in the parent data -refresh DAG. -""" -from airflow.operators.python import PythonOperator -from airflow.utils.task_group import TaskGroup -from popularity.popularity_refresh_types import PopularityRefresh - -from common.constants import POSTGRES_CONN_ID -from common.popularity import sql -from data_refresh import reporting -from data_refresh.data_refresh_types import DataRefresh - - -GROUP_ID = "refresh_popularity_metrics_and_constants" -UPDATE_MEDIA_POPULARITY_METRICS_TASK_ID = "update_media_popularity_metrics" -UPDATE_MEDIA_POPULARITY_CONSTANTS_TASK_ID = "update_media_popularity_constants" - - -def create_refresh_popularity_metrics_task_group( - refresh_config: DataRefresh | PopularityRefresh, -): - """ - Create tasks related to refreshing popularity statistics. - - This factory method instantiates a TaskGroup that will update the popularity - DB tables for the given media type, including percentiles and popularity - metrics. It also creates a reporting tasks which will report the status of the - various steps once they complete. - - Required Arguments: - - refresh_config: configuration data for the refresh - """ - media_type = refresh_config.media_type - execution_timeout = refresh_config.refresh_metrics_timeout - - with TaskGroup(group_id=GROUP_ID) as refresh_all_popularity_data: - update_metrics = sql.update_media_popularity_metrics.override( - task_id=UPDATE_MEDIA_POPULARITY_METRICS_TASK_ID, - execution_timeout=execution_timeout, - )( - postgres_conn_id=POSTGRES_CONN_ID, - media_type=media_type, - ) - update_metrics.doc = ( - "Updates the metrics and target percentiles. If a popularity" - " metric is configured for a new provider, this step will add it" - " to the metrics table." - ) - - update_metrics_status = PythonOperator( - task_id=f"report_{UPDATE_MEDIA_POPULARITY_METRICS_TASK_ID}_status", - python_callable=reporting.report_status, - op_kwargs={ - "media_type": media_type, - "dag_id": refresh_config.dag_id, - "message": "Popularity metrics update complete | " - "_Next: popularity constants update_", - }, - ) - - update_constants = ( - sql.update_percentile_and_constants_for_provider.override( - group_id=UPDATE_MEDIA_POPULARITY_CONSTANTS_TASK_ID, - ) - .partial( - postgres_conn_id=POSTGRES_CONN_ID, - media_type=media_type, - execution_timeout=execution_timeout, - ) - .expand( - provider=[ - provider - for provider in sql.POPULARITY_METRICS_BY_MEDIA_TYPE[ - media_type - ].keys() - ] - ) - ) - update_constants.doc = ( - "Recalculate the percentile values and popularity constants" - " for each provider, and update them in the metrics table. The" - " popularity constants will be used to calculate standardized" - " popularity scores." - ) - - update_constants_status = PythonOperator( - task_id=f"report_{UPDATE_MEDIA_POPULARITY_CONSTANTS_TASK_ID}_status", - python_callable=reporting.report_status, - op_kwargs={ - "media_type": media_type, - "dag_id": refresh_config.dag_id, - "message": "Popularity constants update complete | " - "_Next: refresh matview_", - }, - ) - - update_metrics >> [update_metrics_status, update_constants] - update_constants >> update_constants_status - - return refresh_all_popularity_data diff --git a/catalog/dags/popularity/sql.py b/catalog/dags/popularity/sql.py new file mode 100644 index 00000000000..bacf3bf6291 --- /dev/null +++ b/catalog/dags/popularity/sql.py @@ -0,0 +1,366 @@ +from collections import namedtuple +from datetime import datetime, timedelta +from textwrap import dedent + +from airflow.decorators import task, task_group +from airflow.models.abstractoperator import AbstractOperator +from popularity.popularity_refresh_types import PopularityRefresh + +from common.constants import DAG_DEFAULT_ARGS, SQLInfo +from common.sql import PostgresHook, single_value +from common.storage import columns as col +from common.utils import setup_sql_info_for_media_type + + +DEFAULT_PERCENTILE = 0.85 + + +# Column name constants +VALUE = "val" +CONSTANT = "constant" +FID = col.FOREIGN_ID.db_name +IDENTIFIER = col.IDENTIFIER.db_name +METADATA_COLUMN = col.META_DATA.db_name +METRIC = "metric" +PARTITION = col.PROVIDER.db_name +PERCENTILE = "percentile" +PROVIDER = col.PROVIDER.db_name + +Column = namedtuple("Column", ["name", "definition"]) + + +POPULARITY_METRICS_TABLE_COLUMNS = [ + Column(name=PARTITION, definition="character varying(80) PRIMARY KEY"), + Column(name=METRIC, definition="character varying(80)"), + Column(name=PERCENTILE, definition="float"), + Column(name=VALUE, definition="float"), + Column(name=CONSTANT, definition="float"), +] + + +@task +@setup_sql_info_for_media_type +def drop_media_popularity_functions( + postgres_conn_id: str, *, media_type: str, sql_info: SQLInfo = None +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 + ) + postgres.run( + f"DROP FUNCTION IF EXISTS public.{sql_info.standardized_popularity_fn} CASCADE;" + ) + postgres.run( + f"DROP FUNCTION IF EXISTS public.{sql_info.popularity_percentile_fn} CASCADE;" + ) + + +@task +@setup_sql_info_for_media_type +def create_media_popularity_metrics( + postgres_conn_id: str, *, media_type: str, sql_info: SQLInfo = None +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 + ) + popularity_metrics_columns_string = ",\n ".join( + f"{c.name} {c.definition}" for c in POPULARITY_METRICS_TABLE_COLUMNS + ) + query = dedent( + f""" + CREATE TABLE public.{sql_info.metrics_table} ( + {popularity_metrics_columns_string} + ); + """ + ) + postgres.run(query) + + +@task +@setup_sql_info_for_media_type +def update_media_popularity_metrics( + postgres_conn_id: str, + popularity_metrics: dict, + *, + media_type: str, + sql_info: SQLInfo = None, + task: AbstractOperator = None, +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, + default_statement_timeout=PostgresHook.get_execution_timeout(task), + ) + + column_names = [c.name for c in POPULARITY_METRICS_TABLE_COLUMNS] + + # Note that we do not update the val and constant. That is only done during the + # calculation tasks. In other words, we never want to clear out the current value of + # the popularity constant unless we're already done calculating the new one, since + # that can be a time consuming process. + updates_string = ",\n ".join( + f"{c}=EXCLUDED.{c}" + for c in column_names + if c not in [PARTITION, CONSTANT, VALUE] + ) + popularity_metric_inserts = _get_popularity_metric_insert_values_string( + popularity_metrics + ) + + query = dedent( + f""" + INSERT INTO public.{sql_info.metrics_table} ( + {', '.join(column_names)} + ) VALUES + {popularity_metric_inserts} + ON CONFLICT ({PARTITION}) + DO UPDATE SET + {updates_string} + ; + """ + ) + return postgres.run(query) + + +@task +@setup_sql_info_for_media_type +def calculate_media_popularity_percentile_value( + postgres_conn_id: str, + provider: str, + *, + media_type: str, + sql_info: SQLInfo = None, + task: AbstractOperator = None, +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, + default_statement_timeout=PostgresHook.get_execution_timeout(task), + ) + + # Calculate the percentile value. E.g. if `percentile` = 0.80, then we'll + # calculate the _value_ of the 80th percentile for this provider's + # popularity metric. + calculate_new_percentile_value_query = dedent( + f""" + SELECT {sql_info.popularity_percentile_fn}({PARTITION}, {METRIC}, {PERCENTILE}) + FROM {sql_info.metrics_table} + WHERE {col.PROVIDER.db_name}='{provider}'; + """ + ) + + return postgres.run(calculate_new_percentile_value_query, handler=single_value) + + +@task +@setup_sql_info_for_media_type +def update_percentile_and_constants_values_for_provider( + postgres_conn_id: str, + provider: str, + raw_percentile_value: float, + popularity_metrics: dict, + *, + media_type: str, + sql_info: SQLInfo = None, + task: AbstractOperator = None, +): + if raw_percentile_value is None: + # Occurs when a provider has a metric configured, but there are no records + # with any data for that metric. + return + + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, + default_statement_timeout=PostgresHook.get_execution_timeout(task), + ) + + provider_info = popularity_metrics.get(provider) + percentile = provider_info.get("percentile", DEFAULT_PERCENTILE) + + # Calculate the popularity constant using the percentile value + percentile_value = raw_percentile_value or 1 + new_constant = ((1 - percentile) / (percentile)) * percentile_value + + # Update the percentile value and constant in the metrics table + update_constant_query = dedent( + f""" + UPDATE public.{sql_info.metrics_table} + SET {VALUE} = {percentile_value}, {CONSTANT} = {new_constant} + WHERE {col.PROVIDER.db_name} = '{provider}'; + """ + ) + return postgres.run(update_constant_query) + + +@task_group +def update_percentile_and_constants_for_provider( + postgres_conn_id: str, + provider: str, + media_type: str, + popularity_metrics: dict, + execution_timeout: timedelta = None, +): + calculate_percentile_val = calculate_media_popularity_percentile_value.override( + task_id="calculate_percentile_value", + execution_timeout=execution_timeout + or DAG_DEFAULT_ARGS.get("execution_timeout"), + )( + postgres_conn_id=postgres_conn_id, + provider=provider, + media_type=media_type, + ) + calculate_percentile_val.doc = ( + "Calculate the percentile popularity value for this provider. For" + " example, if this provider has `percentile`=0.80 and `metric`='views'," + " calculate the 80th percentile value of views for all records for this" + " provider." + ) + + update_metrics_table = update_percentile_and_constants_values_for_provider.override( + task_id="update_percentile_values_and_constant", + )( + postgres_conn_id=postgres_conn_id, + provider=provider, + raw_percentile_value=calculate_percentile_val, + media_type=media_type, + popularity_metrics=popularity_metrics, + ) + update_metrics_table.doc = ( + "Given the newly calculated percentile value, calculate the" + " popularity constant and update the metrics table with the newly" + " calculated values." + ) + + +def _get_popularity_metric_insert_values_string( + popularity_metrics: dict, + default_percentile: float = DEFAULT_PERCENTILE, +) -> str: + return ",\n ".join( + _format_popularity_metric_insert_tuple_string( + provider, + provider_info["metric"], + provider_info.get("percentile", default_percentile), + ) + for provider, provider_info in popularity_metrics.items() + ) + + +def _format_popularity_metric_insert_tuple_string( + provider: str, + metric: str, + percentile: float, +): + # Default null val and constant + return f"('{provider}', '{metric}', {percentile}, null, null)" + + +@task +@setup_sql_info_for_media_type +def create_media_popularity_percentile_function( + postgres_conn_id: str, + *, + media_type: str, + sql_info: SQLInfo = None, +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 + ) + + query = dedent( + f""" + CREATE OR REPLACE FUNCTION public.{sql_info.popularity_percentile_fn}( + provider text, pop_field text, percentile float + ) RETURNS FLOAT AS $$ + SELECT percentile_disc($3) WITHIN GROUP ( + ORDER BY ({METADATA_COLUMN}->>$2)::float + ) + FROM {sql_info.media_table} WHERE {PARTITION}=$1; + $$ + LANGUAGE SQL + STABLE + RETURNS NULL ON NULL INPUT; + """ + ) + postgres.run(query) + + +@task +@setup_sql_info_for_media_type +def create_standardized_media_popularity_function( + postgres_conn_id: str, *, media_type: str, sql_info: SQLInfo = None +): + postgres = PostgresHook( + postgres_conn_id=postgres_conn_id, default_statement_timeout=10.0 + ) + query = dedent( + f""" + CREATE OR REPLACE FUNCTION public.{sql_info.standardized_popularity_fn}( + provider text, meta_data jsonb + ) RETURNS FLOAT AS $$ + SELECT ($2->>{METRIC})::float / (($2->>{METRIC})::float + {CONSTANT}) + FROM {sql_info.metrics_table} WHERE provider=$1; + $$ + LANGUAGE SQL + STABLE + RETURNS NULL ON NULL INPUT; + """ + ) + postgres.run(query) + + +@setup_sql_info_for_media_type +def format_update_standardized_popularity_query( + *, + media_type: str, + sql_info: SQLInfo = None, +) -> str: + """ + Create a SQL query for updating the standardized popularity for the given + media type. Only the `SET ...` portion of the query is returned, to be used + by a `batched_update` DagRun. + """ + return ( + f"SET {col.STANDARDIZED_POPULARITY.db_name} =" + f" {sql_info.standardized_popularity_fn}({sql_info.media_table}.{PARTITION}," + f" {sql_info.media_table}.{METADATA_COLUMN})" + ) + + +@task +def get_providers_update_confs( + postgres_conn_id: str, + popularity_refresh: PopularityRefresh, + last_updated_time: datetime, +) -> list[dict]: + """ + Build a list of DagRun confs for each provider of this media type. The confs will + be used by the `batched_update` DAG to perform a batched update of all existing + records, to recalculate their standardized_popularity with the new popularity + constant. Providers that do not support popularity data are omitted. + """ + + # For each provider, create a conf that will be used by the batched_update to + # refresh standardized popularity scores. + return [ + { + # Uniquely identify the query + "query_id": ( + f"{provider}_popularity_refresh_{last_updated_time.strftime('%Y%m%d')}" + ), + "table_name": popularity_refresh.media_type, + # Query used to select records that should be refreshed + "select_query": ( + f"WHERE provider='{provider}' AND updated_on <" + f" '{last_updated_time.strftime('%Y-%m-%d %H:%M:%S')}'" + ), + # Query used to update the standardized_popularity + "update_query": format_update_standardized_popularity_query( + media_type=popularity_refresh.media_type + ), + "batch_size": 10_000, + "update_timeout": ( + popularity_refresh.refresh_popularity_batch_timeout.total_seconds() + ), + "dry_run": False, + "resume_update": False, + } + for provider in popularity_refresh.popularity_metrics.keys() + ] diff --git a/catalog/dags/providers/provider_api_scripts/justtakeitfree.py b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py new file mode 100644 index 00000000000..7b0bafbea55 --- /dev/null +++ b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py @@ -0,0 +1,94 @@ +""" +Content Provider: Justtakeitfree + +ETL Process: Use the API to identify all CC licensed media. + +Output: TSV file containing the media and the + respective meta-data. + +Notes: https://justtakeitfree.com/api/api.php +This API requires an API key. For more details, see https://github.com/WordPress/openverse/pull/2793 +""" +import logging + +from airflow.models import Variable + +from common.constants import IMAGE +from common.licenses import get_license_info +from common.loader import provider_details as prov +from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester + + +logger = logging.getLogger(__name__) + + +class JusttakeitfreeDataIngester(ProviderDataIngester): + providers = { + "image": prov.JUSTTAKEITFREE_IMAGE_PROVIDER, + } + endpoint = "https://justtakeitfree.com/api/api.php" + creator = "Justtakeitfree Free Photos" + creator_url = "https://justtakeitfree.com" + + def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: + if not prev_query_params: + return {"page": 1, "key": Variable.get("API_KEY_JUSTTAKEITFREE")} + else: + return { + **prev_query_params, + "page": prev_query_params["page"] + 1, + } + + def get_batch_data(self, response_json) -> list[list[dict]] | None: + if response_json and (data := response_json.get("data")): + return data + return None + + def get_media_type(self, record: dict): + return IMAGE + + def get_record_data(self, data: list[dict]) -> dict | None: + data = data[0] + if not (foreign_landing_url := data.get("page_link")): + return None + + if not (foreign_identifier := foreign_landing_url.split("/")[-2]): + return None + + if not (url := data.get("full_image_link")): + return None + + license_url = data.get("license_link", "").replace("deed.en", "") + license_info = get_license_info(license_url) + if license_info is None: + return None + + raw_record_data = { + "foreign_landing_url": foreign_landing_url, + "url": url, + "license_info": license_info, + "foreign_identifier": foreign_identifier, + # Optional fields + "creator": self.creator, + "creator_url": self.creator_url, + "raw_tags": data.get("tags"), + "filesize": self.get_file_info(url), + "thumbnail_url": data.get("preview_link"), + } + return {k: v for k, v in raw_record_data.items() if v is not None} + + def get_file_info(self, url) -> int | None: + """Get the image size in bytes.""" + resp = self.delayed_requester.head(url) + if resp: + filesize = int(resp.headers.get("Content-Length", 0)) + return filesize if filesize != 0 else None + + +def main(): + ingester = JusttakeitfreeDataIngester() + ingester.ingest_records() + + +if __name__ == "__main__": + main() diff --git a/catalog/dags/providers/provider_workflows.py b/catalog/dags/providers/provider_workflows.py index 0d50c0352cd..81f8c1e55e0 100644 --- a/catalog/dags/providers/provider_workflows.py +++ b/catalog/dags/providers/provider_workflows.py @@ -16,6 +16,7 @@ from providers.provider_api_scripts.freesound import FreesoundDataIngester from providers.provider_api_scripts.inaturalist import INaturalistDataIngester from providers.provider_api_scripts.jamendo import JamendoDataIngester +from providers.provider_api_scripts.justtakeitfree import JusttakeitfreeDataIngester from providers.provider_api_scripts.metropolitan_museum import MetMuseumDataIngester from providers.provider_api_scripts.museum_victoria import VictoriaDataIngester from providers.provider_api_scripts.nappy import NappyDataIngester @@ -236,6 +237,10 @@ def __post_init__(self): ProviderWorkflow( ingester_class=JamendoDataIngester, ), + ProviderWorkflow( + ingester_class=JusttakeitfreeDataIngester, + start_date=datetime(2023, 9, 1), + ), ProviderWorkflow( ingester_class=MetMuseumDataIngester, start_date=datetime(2016, 9, 1), diff --git a/catalog/env.template b/catalog/env.template index c1df4bd87a3..4ad3b05bb16 100644 --- a/catalog/env.template +++ b/catalog/env.template @@ -32,6 +32,7 @@ AIRFLOW_VAR_API_KEY_EUROPEANA=not_set AIRFLOW_VAR_API_KEY_FLICKR=not_set AIRFLOW_VAR_API_KEY_FREESOUND=not_set AIRFLOW_VAR_API_KEY_JAMENDO=not_set +AIRFLOW_VAR_API_KEY_JUSTTAKEITFREE=not_set AIRFLOW_VAR_API_KEY_NYPL=not_set AIRFLOW_VAR_API_KEY_RAWPIXEL=not_set AIRFLOW_VAR_API_KEY_THINGIVERSE=not_set diff --git a/catalog/tests/dags/common/conftest.py b/catalog/tests/dags/common/conftest.py index 1a97d5b3a77..c36f42a7209 100644 --- a/catalog/tests/dags/common/conftest.py +++ b/catalog/tests/dags/common/conftest.py @@ -1,20 +1,14 @@ import socket -from datetime import datetime, timedelta from urllib.parse import urlparse import boto3 import pytest -from airflow import DAG -from airflow.models.abstractoperator import AbstractOperator -from airflow.operators.python import PythonOperator from catalog.tests.dags.common.loader.test_s3 import ( ACCESS_KEY, S3_LOCAL_ENDPOINT, SECRET_KEY, ) -from common.constants import POSTGRES_CONN_ID -from common.sql import PGExecuteQueryOperator, PostgresHook POSTGRES_TEST_CONN_ID = "postgres_openledger_testing" @@ -56,88 +50,3 @@ def empty_s3_bucket(request): bucket.create() yield bucket _delete_bucket(bucket) - - -@pytest.fixture -def identifier(request): - return f"{hash(request.node.name)}".replace("-", "_") - - -@pytest.fixture -def image_table(identifier): - # Parallelized tests need to use distinct database tables - return f"image_{identifier}" - - -TEST_SQL = "SELECT PG_SLEEP(1);" - - -def timed_pg_hook_sleeper( - task, - statement_timeout: float = None, -): - pg = PostgresHook( - default_statement_timeout=PostgresHook.get_execution_timeout(task), - conn_id=POSTGRES_CONN_ID, - ) - pg.run(sql=TEST_SQL, statement_timeout=statement_timeout) - - -def mapped_select_pg_hook( - select_val: int, - task: AbstractOperator, -): - pg = PostgresHook( - default_statement_timeout=PostgresHook.get_execution_timeout(task), - conn_id=POSTGRES_CONN_ID, - ) - return pg.run(f"select {select_val};") - - -def create_pg_timeout_tester_dag(): - with DAG( - dag_id="a_pg_timeout_tester", - schedule=None, - doc_md="DAG to test query timeouts in postgres", - start_date=datetime(2023, 1, 1), - ) as dag: - pg_operator_happy = PGExecuteQueryOperator( - task_id="pg_operator_happy", - retries=0, - conn_id=POSTGRES_CONN_ID, - sql=TEST_SQL, - execution_timeout=timedelta(seconds=2), - doc_md="Custom PG operator, with query finished before execution timeout", - ) - pg_hook_happy = PythonOperator( - task_id="pg_hook_happy", - retries=0, - python_callable=timed_pg_hook_sleeper, - execution_timeout=timedelta(hours=2), - doc_md="Custom PG hook, with query finished before execution timeout", - ) - pg_hook_no_timeout = PythonOperator( - task_id="pg_hook_no_timeout", - retries=0, - python_callable=timed_pg_hook_sleeper, - doc_md="Custom PG hook, with no execution timeout", - ) - pg_operator_mapped = PythonOperator.partial( - task_id="pg_operator_mapped", - retries=0, - execution_timeout=timedelta(minutes=1), - doc_md="Custom PG operator, mapped to list", - python_callable=mapped_select_pg_hook, - ).expand(op_args=[(1,), (2,)]) - [pg_operator_happy, pg_hook_happy, pg_hook_no_timeout, pg_operator_mapped] - return dag - - -@pytest.fixture(scope="session") -def mock_timeout_dag(): - return create_pg_timeout_tester_dag() - - -@pytest.fixture(scope="session") -def mock_pg_hook_task(mock_timeout_dag) -> PythonOperator: - return mock_timeout_dag.get_task("pg_hook_happy") diff --git a/catalog/tests/dags/common/loader/test_sql.py b/catalog/tests/dags/common/loader/test_sql.py index 4569c77e46a..ede644e31d3 100644 --- a/catalog/tests/dags/common/loader/test_sql.py +++ b/catalog/tests/dags/common/loader/test_sql.py @@ -11,10 +11,7 @@ from psycopg2.errors import InvalidTextRepresentation from catalog.tests.dags.common.conftest import POSTGRES_TEST_CONN_ID as POSTGRES_CONN_ID -from catalog.tests.dags.common.popularity.test_sql import ( - TableInfo, - _set_up_std_popularity_func, -) +from catalog.tests.dags.popularity.test_sql import _set_up_std_popularity_func from catalog.tests.test_utils import sql as utils from common.loader import sql from common.storage import columns as col @@ -29,22 +26,6 @@ def load_table(identifier): return f"load_image_{identifier}" -@pytest.fixture -def table_info( - image_table, - identifier, -) -> TableInfo: - return TableInfo( - image=image_table, - image_view=f"image_view_{identifier}", - metrics=f"image_popularity_metrics_{identifier}", - standardized_popularity=f"standardized_popularity_{identifier}", - popularity_percentile=f"popularity_percentile_{identifier}", - image_view_idx=f"test_view_id_{identifier}_idx", - provider_fid_idx=f"test_view_provider_fid_{identifier}_idx", - ) - - @pytest.fixture def postgres(load_table) -> utils.PostgresRef: conn = psycopg2.connect(utils.POSTGRES_TEST_URI) @@ -73,26 +54,24 @@ def postgres_with_load_table( @pytest.fixture -def postgres_with_load_and_image_table( - load_table, image_table, table_info, mock_pg_hook_task -): +def postgres_with_load_and_image_table(load_table, sql_info, mock_pg_hook_task): conn = psycopg2.connect(utils.POSTGRES_TEST_URI) cur = conn.cursor() drop_test_relations_query = f""" DROP TABLE IF EXISTS {load_table} CASCADE; - DROP TABLE IF EXISTS {image_table} CASCADE; - DROP INDEX IF EXISTS {image_table}_provider_fid_idx; - DROP TABLE IF EXISTS {table_info.metrics} CASCADE; - DROP FUNCTION IF EXISTS {table_info.standardized_popularity} CASCADE; - DROP FUNCTION IF EXISTS {table_info.popularity_percentile} CASCADE; + DROP TABLE IF EXISTS {sql_info.media_table} CASCADE; + DROP INDEX IF EXISTS {sql_info.media_table}_provider_fid_idx; + DROP TABLE IF EXISTS {sql_info.metrics_table} CASCADE; + DROP FUNCTION IF EXISTS {sql_info.standardized_popularity_fn} CASCADE; + DROP FUNCTION IF EXISTS {sql_info.popularity_percentile_fn} CASCADE; """ cur.execute(drop_test_relations_query) cur.execute(utils.CREATE_LOAD_TABLE_QUERY.format(load_table)) cur.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public;') - cur.execute(utils.CREATE_IMAGE_TABLE_QUERY.format(image_table)) - cur.execute(utils.UNIQUE_CONDITION_QUERY.format(table=image_table)) + cur.execute(utils.CREATE_IMAGE_TABLE_QUERY.format(sql_info.media_table)) + cur.execute(utils.UNIQUE_CONDITION_QUERY.format(table=sql_info.media_table)) conn.commit() @@ -142,7 +121,7 @@ def test_create_loading_table_creates_table( postgres, load_table, identifier, mock_pg_hook_task ): postgres_conn_id = POSTGRES_CONN_ID - sql.create_loading_table(postgres_conn_id, identifier) + sql.create_loading_table(postgres_conn_id, identifier, media_type="image") check_query = ( f"SELECT EXISTS (SELECT FROM pg_tables WHERE tablename='{load_table}');" @@ -154,9 +133,9 @@ def test_create_loading_table_creates_table( def test_create_loading_table_errors_if_run_twice_with_same_id(postgres, identifier): postgres_conn_id = POSTGRES_CONN_ID - sql.create_loading_table(postgres_conn_id, identifier) + sql.create_loading_table(postgres_conn_id, identifier, media_type="image") with pytest.raises(Exception): - sql.create_loading_table(postgres_conn_id, identifier) + sql.create_loading_table(postgres_conn_id, identifier, media_type="image") @flaky @@ -421,6 +400,7 @@ def test_upsert_records_inserts_one_record_to_empty_image_table( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -470,10 +450,21 @@ def test_upsert_records_inserts_one_record_to_empty_image_table( load_data_query = f"""INSERT INTO {load_table} VALUES( {query_values} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query) - postgres_with_load_and_image_table.connection.commit() + + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") actual_rows = postgres_with_load_and_image_table.cursor.fetchall() @@ -504,6 +495,7 @@ def test_upsert_records_inserts_two_records_to_image_table( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -534,8 +526,21 @@ def test_upsert_records_inserts_two_records_to_image_table( );""" postgres_with_load_and_image_table.cursor.execute(load_data_query) postgres_with_load_and_image_table.connection.commit() + + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + None, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") actual_rows = postgres_with_load_and_image_table.cursor.fetchall() @@ -548,6 +553,7 @@ def test_upsert_records_replaces_updated_on_and_last_synced_with_source( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -567,11 +573,21 @@ def test_upsert_records_replaces_updated_on_and_last_synced_with_source( '{FID}','{LAND_URL}','{IMG_URL}','{LICENSE}','{VERSION}', '{PROVIDER}','{PROVIDER}' );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query) - postgres_with_load_and_image_table.connection.commit() + + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query, + {}, + sql_info, + mock_pg_hook_task, + ) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") original_row = postgres_with_load_and_image_table.cursor.fetchall() @@ -585,7 +601,11 @@ def test_upsert_records_replaces_updated_on_and_last_synced_with_source( time.sleep(0.5) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") updated_result = postgres_with_load_and_image_table.cursor.fetchall() @@ -606,6 +626,7 @@ def test_upsert_records_replaces_data( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -668,10 +689,20 @@ def test_upsert_records_replaces_data( load_data_query_a = f"""INSERT INTO {load_table} VALUES( {query_values} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() @@ -705,7 +736,11 @@ def test_upsert_records_replaces_data( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -730,6 +765,7 @@ def test_upsert_records_does_not_replace_with_nulls( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -784,10 +820,20 @@ def test_upsert_records_does_not_replace_with_nulls( load_data_query_a = f"""INSERT INTO {load_table} VALUES( {query_values_a} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() @@ -811,7 +857,11 @@ def test_upsert_records_does_not_replace_with_nulls( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -837,6 +887,7 @@ def test_upsert_records_merges_meta_data( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -875,10 +926,20 @@ def test_upsert_records_merges_meta_data( load_data_query_b = f"""INSERT INTO {load_table} VALUES( {query_values_b} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -886,7 +947,11 @@ def test_upsert_records_merges_meta_data( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -903,6 +968,7 @@ def test_upsert_records_does_not_replace_with_null_values_in_meta_data( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -941,10 +1007,20 @@ def test_upsert_records_does_not_replace_with_null_values_in_meta_data( load_data_query_b = f"""INSERT INTO {load_table} VALUES( {query_values_b} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) + sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -952,7 +1028,11 @@ def test_upsert_records_does_not_replace_with_null_values_in_meta_data( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -971,6 +1051,7 @@ def test_upsert_records_merges_tags( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -1016,10 +1097,19 @@ def test_upsert_records_merges_tags( load_data_query_b = f"""INSERT INTO {load_table} VALUES( {query_values_b} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -1027,7 +1117,11 @@ def test_upsert_records_merges_tags( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -1050,6 +1144,7 @@ def test_upsert_records_does_not_replace_tags_with_null( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -1089,10 +1184,19 @@ def test_upsert_records_does_not_replace_tags_with_null( load_data_query_b = f"""INSERT INTO {load_table} VALUES( {query_values_b} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -1100,7 +1204,11 @@ def test_upsert_records_does_not_replace_tags_with_null( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -1122,6 +1230,7 @@ def test_upsert_records_replaces_null_tags( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -1160,10 +1269,19 @@ def test_upsert_records_replaces_null_tags( {query_values_b} );""" - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -1171,7 +1289,11 @@ def test_upsert_records_replaces_null_tags( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"SELECT * FROM {image_table};") @@ -1194,6 +1316,7 @@ def test_upsert_records_handles_duplicate_url_and_does_not_merge( tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -1238,10 +1361,19 @@ def test_upsert_records_handles_duplicate_url_and_does_not_merge( # Simulate a DAG run where A is ingested into the loading table, upserted into # the image table, and finally the loading table is cleared for the next DAG run. - postgres_with_load_and_image_table.cursor.execute(load_data_query_a) - postgres_with_load_and_image_table.connection.commit() + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + load_data_query_a, + {}, + sql_info, + mock_pg_hook_task, + ) sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() postgres_with_load_and_image_table.cursor.execute(f"DELETE FROM {load_table};") @@ -1252,7 +1384,11 @@ def test_upsert_records_handles_duplicate_url_and_does_not_merge( postgres_with_load_and_image_table.cursor.execute(load_data_query_b) postgres_with_load_and_image_table.connection.commit() sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() @@ -1274,6 +1410,7 @@ def test_upsert_records_handles_duplicate_urls_in_a_single_batch_and_does_not_me tmpdir, load_table, image_table, + sql_info, identifier, mock_pg_hook_task, ): @@ -1345,9 +1482,20 @@ def test_upsert_records_handles_duplicate_urls_in_a_single_batch_and_does_not_me rows = postgres_with_load_and_image_table.cursor.fetchall() assert len(rows) == 3 + _set_up_std_popularity_func( + postgres_with_load_and_image_table, + None, + {}, + sql_info, + mock_pg_hook_task, + ) # Now try upserting the records from the loading table to the final image table. sql.upsert_records_to_db_table( - postgres_conn_id, identifier, db_table=image_table, task=mock_pg_hook_task + postgres_conn_id, + identifier, + media_type="image", + sql_info=sql_info, + task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() @@ -1370,7 +1518,7 @@ def test_upsert_records_calculates_standardized_popularity( load_table, image_table, identifier, - table_info, + sql_info, mock_pg_hook_task, ): postgres_conn_id = POSTGRES_CONN_ID @@ -1407,17 +1555,17 @@ def test_upsert_records_calculates_standardized_popularity( PROVIDER: {"metric": "views", "percentile": 0.8}, } - # Now we set up the popularity constants tables, views, and functions. This method will - # run the `data_query` to insert our test rows, which will initially have `null` standardized + # Now we re-set up the popularity constants tables, views, and functions after running + # the `data_query` to insert our test rows, which will initially have `null` standardized # popularity (because no popularity constants exist). Then it will insert `metrics` into - # the `image_popularity_metrics` table, and create the `image_popularity_constants` view, - # calculating a value for the popularity constant for PROVIDER using those initial records. + # the `image_popularity_metrics` table, and calculate a value for the popularity constant + # for PROVIDER using those initial records. # Then it sets up the standardized popularity function itself. _set_up_std_popularity_func( postgres_with_load_and_image_table, data_query, metrics, - table_info, + sql_info, mock_pg_hook_task, ) @@ -1471,8 +1619,8 @@ def test_upsert_records_calculates_standardized_popularity( sql.upsert_records_to_db_table( postgres_conn_id, identifier, - db_table=image_table, - popularity_function=table_info.standardized_popularity, + media_type="image", + sql_info=sql_info, task=mock_pg_hook_task, ) postgres_with_load_and_image_table.connection.commit() diff --git a/catalog/tests/dags/common/popularity/__init__.py b/catalog/tests/dags/common/popularity/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/catalog/tests/dags/common/popularity/test_dag_factory.py b/catalog/tests/dags/common/popularity/test_dag_factory.py deleted file mode 100644 index ba173a24dfe..00000000000 --- a/catalog/tests/dags/common/popularity/test_dag_factory.py +++ /dev/null @@ -1,101 +0,0 @@ -from datetime import timedelta -from unittest import mock - -import pytest -from airflow.models import DagRun -from airflow.models.dag import DAG -from airflow.utils.session import create_session -from airflow.utils.timezone import datetime -from airflow.utils.types import DagRunType -from popularity.dag_factory import get_providers_update_confs -from popularity.popularity_refresh_types import PopularityRefresh - -from catalog.tests.test_utils.sql import POSTGRES_CONN_ID - - -TEST_DAG_ID = "popularity_refresh_dag_factory_test_dag" -TEST_DAG = DAG(TEST_DAG_ID, default_args={"owner": "airflow"}) -TEST_DAY = datetime(2023, 1, 1) - - -@pytest.fixture(autouse=True) -def clean_db(): - with create_session() as session: - session.query(DagRun).filter(DagRun.dag_id == TEST_DAG_ID).delete() - - -def _create_dagrun(start_date, dag_state, conf={}): - return TEST_DAG.create_dagrun( - start_date=start_date, - execution_date=start_date, - data_interval=(start_date, start_date), - state=dag_state, - run_type=DagRunType.MANUAL, - conf=conf, - ) - - -@pytest.mark.parametrize( - "providers, media_type, expected_confs", - [ - # No providers for this media type - ([], "image", []), - ( - ["foo_provider"], - "image", - [ - { - "query_id": "foo_provider_popularity_refresh_20230101", - "table_name": "image", - "select_query": "WHERE provider='foo_provider' AND updated_on < '2023-01-01 00:00:00'", - "update_query": "SET standardized_popularity = standardized_image_popularity(image.provider, image.meta_data)", - "batch_size": 10000, - "update_timeout": 3600.0, - "dry_run": False, - "resume_update": False, - }, - ], - ), - ( - ["my_provider", "your_provider"], - "audio", - [ - { - "query_id": "my_provider_popularity_refresh_20230101", - "table_name": "audio", - "select_query": "WHERE provider='my_provider' AND updated_on < '2023-01-01 00:00:00'", - "update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)", - "batch_size": 10000, - "update_timeout": 3600.0, - "dry_run": False, - "resume_update": False, - }, - { - "query_id": "your_provider_popularity_refresh_20230101", - "table_name": "audio", - "select_query": "WHERE provider='your_provider' AND updated_on < '2023-01-01 00:00:00'", - "update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)", - "batch_size": 10000, - "update_timeout": 3600.0, - "dry_run": False, - "resume_update": False, - }, - ], - ), - ], -) -def test_get_providers_update_confs(providers, media_type, expected_confs): - with mock.patch( - "common.popularity.sql.get_providers_with_popularity_data_for_media_type", - return_value=providers, - ): - actual_confs = get_providers_update_confs.function( - POSTGRES_CONN_ID, - PopularityRefresh( - media_type=media_type, - refresh_popularity_batch_timeout=timedelta(hours=1), - ), - TEST_DAY, - ) - - assert actual_confs == expected_confs diff --git a/catalog/tests/dags/common/popularity/test_resources/mock_popularity_dump.tsv b/catalog/tests/dags/common/popularity/test_resources/mock_popularity_dump.tsv deleted file mode 100644 index b2e3a7cf65c..00000000000 --- a/catalog/tests/dags/common/popularity/test_resources/mock_popularity_dump.tsv +++ /dev/null @@ -1,4 +0,0 @@ -identifier provider global_usage_count views -00000000-0000-0000-0000-000000000001 foo 500 -00000000-0000-0000-0000-000000000002 foo 10 -00000000-0000-0000-0000-000000000003 foo 10 500 diff --git a/catalog/tests/dags/common/test_utils.py b/catalog/tests/dags/common/test_utils.py new file mode 100644 index 00000000000..aec2401e1fc --- /dev/null +++ b/catalog/tests/dags/common/test_utils.py @@ -0,0 +1,101 @@ +import pytest + +from common.utils import setup_kwargs_for_media_type + + +TEST_VALS_BY_MEDIA_TYPE = {"audio": "foo", "image": "bar"} +p = pytest.param + + +@pytest.mark.parametrize( + "media_type, my_param, expected_param", + ( + ("audio", None, "foo"), + ("image", None, "bar"), + # Pass in an explicit value for my_param; this should be returned + p( + "audio", + "hello world", + "hello world", + id="explicitly passed value should be returned", + ), + p( + "foo", + "hello world", + "hello world", + id="explicitly passed value is returned, even if the values dict does not have a key for the media type", + ), + # No media type + p( + None, + None, + None, + marks=pytest.mark.raises(exception=ValueError), + id="raises error when no media type passed", + ), + p( + "foo", + None, + None, + marks=pytest.mark.raises(exception=ValueError), + id="raises error when no matching key in values dict", + ), + ), +) +def test_setup_kwargs_for_media_type(media_type, my_param, expected_param): + @setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "my_param") + def test_fn(*, media_type: str, my_param: str = None): + assert my_param == expected_param + + test_fn(media_type=media_type, my_param=my_param) + + +def test_setup_kwargs_for_media_type_creates_new_decorator(): + # Create a new decorator using the factory + new_decorator = setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "new_param") + + # New function decorated with this decorator + @new_decorator + def test_fn(*, media_type: str, new_param: str = None): + return new_param + + assert test_fn(media_type="audio") == "foo" + + +def test_setup_kwargs_for_media_type_fails_without_media_type_kwarg(): + with pytest.raises(Exception, match="Improperly configured"): + # Decorated function does not have a media_type kwarg + @setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "my_param") + def test_fn(*, my_param: str = None): + pass + + +def test_setup_kwargs_for_media_type_fails_with_media_type_arg(): + with pytest.raises(Exception, match="Improperly configured"): + # Decorate a function that allows media_type to be passed as a keyword + # or as a positional argument + @setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "my_param") + def test_fn(media_type, my_param: str = None): + pass + + +def test_setup_kwargs_for_media_type_fails_with_var_kwargs(): + with pytest.raises(Exception, match="Improperly configured"): + # Decorate a function that has var kwargs but does not explicitly + # require a keyword-only `media_type` arg + @setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "my_param") + def test_fn(**kwargs): + pass + + +def test_setup_kwargs_for_media_type_fails_without_kwarg(): + # Decorated function does not have the kwarg we want populated + @setup_kwargs_for_media_type(TEST_VALS_BY_MEDIA_TYPE, "my_param") + def test_fn(*, media_type: str): + pass + + with pytest.raises( + TypeError, + match="got an unexpected keyword argument 'my_param'", + ): + test_fn(media_type="audio") diff --git a/catalog/tests/dags/conftest.py b/catalog/tests/dags/conftest.py index d994ae6729b..a9e53f3b60b 100644 --- a/catalog/tests/dags/conftest.py +++ b/catalog/tests/dags/conftest.py @@ -1,8 +1,14 @@ +from datetime import datetime, timedelta from unittest import mock import pytest +from airflow import DAG +from airflow.models.abstractoperator import AbstractOperator +from airflow.operators.python import PythonOperator from requests import Response +from common.constants import POSTGRES_CONN_ID, SQLInfo +from common.sql import PGExecuteQueryOperator, PostgresHook from oauth2 import oauth2 @@ -28,6 +34,104 @@ def _var_get_replacement(*args, **kwargs): return values[args[0]] +@pytest.fixture +def identifier(request): + return f"{hash(request.node.name)}".replace("-", "_") + + +@pytest.fixture +def image_table(identifier): + # Parallelized tests need to use distinct database tables + return f"image_{identifier}" + + +@pytest.fixture +def sql_info( + image_table, + identifier, +) -> SQLInfo: + return SQLInfo( + media_table=image_table, + metrics_table=f"image_popularity_metrics_{identifier}", + standardized_popularity_fn=f"standardized_image_popularity_{identifier}", + popularity_percentile_fn=f"image_popularity_percentile_{identifier}", + ) + + +TEST_SQL = "SELECT PG_SLEEP(1);" + + +def timed_pg_hook_sleeper( + task, + statement_timeout: float = None, +): + pg = PostgresHook( + default_statement_timeout=PostgresHook.get_execution_timeout(task), + conn_id=POSTGRES_CONN_ID, + ) + pg.run(sql=TEST_SQL, statement_timeout=statement_timeout) + + +def mapped_select_pg_hook( + select_val: int, + task: AbstractOperator, +): + pg = PostgresHook( + default_statement_timeout=PostgresHook.get_execution_timeout(task), + conn_id=POSTGRES_CONN_ID, + ) + return pg.run(f"select {select_val};") + + +def create_pg_timeout_tester_dag(): + with DAG( + dag_id="a_pg_timeout_tester", + schedule=None, + doc_md="DAG to test query timeouts in postgres", + start_date=datetime(2023, 1, 1), + ) as dag: + pg_operator_happy = PGExecuteQueryOperator( + task_id="pg_operator_happy", + retries=0, + conn_id=POSTGRES_CONN_ID, + sql=TEST_SQL, + execution_timeout=timedelta(seconds=2), + doc_md="Custom PG operator, with query finished before execution timeout", + ) + pg_hook_happy = PythonOperator( + task_id="pg_hook_happy", + retries=0, + python_callable=timed_pg_hook_sleeper, + execution_timeout=timedelta(hours=2), + doc_md="Custom PG hook, with query finished before execution timeout", + ) + pg_hook_no_timeout = PythonOperator( + task_id="pg_hook_no_timeout", + retries=0, + python_callable=timed_pg_hook_sleeper, + doc_md="Custom PG hook, with no execution timeout", + ) + pg_operator_mapped = PythonOperator.partial( + task_id="pg_operator_mapped", + retries=0, + execution_timeout=timedelta(minutes=1), + doc_md="Custom PG operator, mapped to list", + python_callable=mapped_select_pg_hook, + ).expand(op_args=[(1,), (2,)]) + [pg_operator_happy, pg_hook_happy, pg_hook_no_timeout, pg_operator_mapped] + return dag + + +@pytest.fixture(scope="session") +def mock_timeout_dag(): + return create_pg_timeout_tester_dag() + + +@pytest.fixture(scope="session") +def mock_pg_hook_task(mock_timeout_dag) -> PythonOperator: + return mock_timeout_dag.get_task("pg_hook_happy") + + @pytest.fixture def oauth_provider_var_mock(): with mock.patch("oauth2.oauth2.Variable") as MockVariable: diff --git a/catalog/dags/common/popularity/__init__.py b/catalog/tests/dags/popularity/__init__.py similarity index 100% rename from catalog/dags/common/popularity/__init__.py rename to catalog/tests/dags/popularity/__init__.py diff --git a/catalog/tests/dags/popularity/test_popularity_refresh_types.py b/catalog/tests/dags/popularity/test_popularity_refresh_types.py new file mode 100644 index 00000000000..6cece979c9f --- /dev/null +++ b/catalog/tests/dags/popularity/test_popularity_refresh_types.py @@ -0,0 +1,36 @@ +import re +from pathlib import Path + +import pytest +from popularity.popularity_refresh_types import POPULARITY_REFRESH_CONFIGS + + +DDL_DEFINITIONS_PATH = Path(__file__).parents[4] / "docker" / "upstream_db" + + +@pytest.mark.parametrize( + "ddl_filename, metrics", + [ + ( + "0004_openledger_image_view.sql", + POPULARITY_REFRESH_CONFIGS[0].popularity_metrics, + ), + ( + "0007_openledger_audio_view.sql", + POPULARITY_REFRESH_CONFIGS[1].popularity_metrics, + ), + ], +) +def test_ddl_matches_definitions(ddl_filename, metrics): + ddl = (DDL_DEFINITIONS_PATH / ddl_filename).read_text() + if not ( + match := re.search( + r"INSERT INTO public.\w+_popularity_metrics.*?;", + ddl, + re.MULTILINE | re.DOTALL, + ) + ): + raise ValueError(f"Could not find insert statement in ddl file {ddl_filename}") + + for provider in metrics: + assert provider in match.group(0) diff --git a/catalog/tests/dags/common/popularity/test_sql.py b/catalog/tests/dags/popularity/test_sql.py similarity index 57% rename from catalog/tests/dags/common/popularity/test_sql.py rename to catalog/tests/dags/popularity/test_sql.py index 12b9285e3e3..3792b22061f 100644 --- a/catalog/tests/dags/common/popularity/test_sql.py +++ b/catalog/tests/dags/popularity/test_sql.py @@ -1,73 +1,58 @@ import os -import re from collections import namedtuple -from pathlib import Path +from datetime import datetime, timedelta from textwrap import dedent -from typing import NamedTuple import psycopg2 import pytest +from popularity import sql +from popularity.popularity_refresh_types import PopularityRefresh from catalog.tests.dags.common.conftest import POSTGRES_TEST_CONN_ID as POSTGRES_CONN_ID +from common.constants import SQLInfo from common.loader.sql import create_column_definitions -from common.popularity import sql from common.storage.db_columns import IMAGE_TABLE_COLUMNS -DDL_DEFINITIONS_PATH = Path(__file__).parents[5] / "docker" / "upstream_db" POSTGRES_TEST_URI = os.getenv("AIRFLOW_CONN_POSTGRES_OPENLEDGER_TESTING") -class TableInfo(NamedTuple): - image: str - image_view: str - metrics: str - standardized_popularity: str - popularity_percentile: str - image_view_idx: str - provider_fid_idx: str - - @pytest.fixture -def table_info( +def sql_info( image_table, identifier, -) -> TableInfo: - return TableInfo( - image=image_table, - image_view=f"image_view_{identifier}", - metrics=f"image_popularity_metrics_{identifier}", - standardized_popularity=f"standardized_popularity_{identifier}", - popularity_percentile=f"popularity_percentile_{identifier}", - image_view_idx=f"test_view_id_{identifier}_idx", - provider_fid_idx=f"test_view_provider_fid_{identifier}_idx", +) -> SQLInfo: + return SQLInfo( + media_table=image_table, + metrics_table=f"image_popularity_metrics_{identifier}", + standardized_popularity_fn=f"standardized_image_popularity_{identifier}", + popularity_percentile_fn=f"image_popularity_percentile_{identifier}", ) @pytest.fixture -def postgres_with_image_table(table_info): +def postgres_with_image_table(sql_info): Postgres = namedtuple("Postgres", ["cursor", "connection"]) conn = psycopg2.connect(POSTGRES_TEST_URI) cur = conn.cursor() drop_test_relations_query = f""" - DROP MATERIALIZED VIEW IF EXISTS {table_info.image_view} CASCADE; - DROP TABLE IF EXISTS {table_info.metrics} CASCADE; - DROP TABLE IF EXISTS {table_info.image} CASCADE; - DROP FUNCTION IF EXISTS {table_info.standardized_popularity} CASCADE; - DROP FUNCTION IF EXISTS {table_info.popularity_percentile} CASCADE; + DROP TABLE IF EXISTS {sql_info.metrics_table} CASCADE; + DROP TABLE IF EXISTS {sql_info.media_table} CASCADE; + DROP FUNCTION IF EXISTS {sql_info.standardized_popularity_fn} CASCADE; + DROP FUNCTION IF EXISTS {sql_info.popularity_percentile_fn} CASCADE; """ cur.execute(drop_test_relations_query) cur.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public;') image_columns = create_column_definitions(IMAGE_TABLE_COLUMNS) - cur.execute(f"CREATE TABLE public.{table_info.image} ({image_columns});") + cur.execute(f"CREATE TABLE public.{sql_info.media_table} ({image_columns});") cur.execute( f""" -CREATE UNIQUE INDEX {table_info.image}_provider_fid_idx -ON public.{table_info.image} +CREATE UNIQUE INDEX {sql_info.media_table}_provider_fid_idx +ON public.{sql_info.media_table} USING btree (provider, md5(foreign_identifier)); """ ) @@ -82,20 +67,21 @@ def postgres_with_image_table(table_info): conn.close() -def _set_up_popularity_metrics(metrics_dict, table_info, mock_pg_hook_task): +def _set_up_popularity_metrics(metrics_dict, sql_info, mock_pg_hook_task): conn_id = POSTGRES_CONN_ID # Create metrics table - sql.create_media_popularity_metrics( - postgres_conn_id=conn_id, - popularity_metrics_table=table_info.metrics, + sql.create_media_popularity_metrics.function( + postgres_conn_id=conn_id, media_type="image", sql_info=sql_info ) # Insert values from metrics_dict into metrics table - sql.update_media_popularity_metrics.function( - postgres_conn_id=conn_id, - popularity_metrics=metrics_dict, - popularity_metrics_table=table_info.metrics, - task=mock_pg_hook_task, - ) + if metrics_dict: + sql.update_media_popularity_metrics.function( + postgres_conn_id=conn_id, + media_type="image", + popularity_metrics=metrics_dict, + sql_info=sql_info, + task=mock_pg_hook_task, + ) # For each provider in metrics_dict, calculate the percentile and then # update the percentile and popularity constant @@ -103,95 +89,68 @@ def _set_up_popularity_metrics(metrics_dict, table_info, mock_pg_hook_task): percentile_val = sql.calculate_media_popularity_percentile_value.function( postgres_conn_id=conn_id, provider=provider, + media_type="image", task=mock_pg_hook_task, - popularity_metrics_table=table_info.metrics, - popularity_percentile=table_info.popularity_percentile, + sql_info=sql_info, ) sql.update_percentile_and_constants_values_for_provider.function( postgres_conn_id=conn_id, provider=provider, raw_percentile_value=percentile_val, - popularity_metrics_table=table_info.metrics, + media_type="image", popularity_metrics=metrics_dict, + sql_info=sql_info, ) -def _set_up_popularity_percentile_function(table_info): +def _set_up_popularity_percentile_function(sql_info): conn_id = POSTGRES_CONN_ID - sql.create_media_popularity_percentile_function( - conn_id, - popularity_percentile=table_info.popularity_percentile, - media_table=table_info.image, + sql.create_media_popularity_percentile_function.function( + conn_id, media_type="image", sql_info=sql_info ) -def _set_up_popularity_constants( +def _set_up_popularity_metrics_and_constants( pg, data_query, metrics_dict, - table_info, + sql_info, mock_pg_hook_task, ): # Execute the data query first (typically, loads sample data into the media table) - pg.cursor.execute(data_query) - pg.connection.commit() + if data_query: + pg.cursor.execute(data_query) + pg.connection.commit() # Then set up functions, metrics, and constants - _set_up_popularity_percentile_function(table_info) - _set_up_popularity_metrics(metrics_dict, table_info, mock_pg_hook_task) + _set_up_popularity_percentile_function(sql_info) + _set_up_popularity_metrics(metrics_dict, sql_info, mock_pg_hook_task) def _set_up_std_popularity_func( pg, data_query, metrics_dict, - table_info, + sql_info, mock_pg_hook_task, ): conn_id = POSTGRES_CONN_ID - _set_up_popularity_constants( + _set_up_popularity_metrics_and_constants( pg, data_query, metrics_dict, - table_info, - mock_pg_hook_task, - ) - sql.create_standardized_media_popularity_function( - conn_id, + sql_info, mock_pg_hook_task, - function_name=table_info.standardized_popularity, - popularity_metrics=table_info.metrics, - ) - - -def _set_up_image_view( - pg, - data_query, - metrics_dict, - table_info, - mock_pg_hook_task, -): - conn_id = POSTGRES_CONN_ID - _set_up_std_popularity_func( - pg, data_query, metrics_dict, table_info, mock_pg_hook_task ) - sql.create_media_view( - conn_id, - standardized_popularity_func=table_info.standardized_popularity, - table_name=table_info.image, - db_view_name=table_info.image_view, - db_view_id_idx=table_info.image_view_idx, - db_view_provider_fid_idx=table_info.provider_fid_idx, - task=mock_pg_hook_task, + sql.create_standardized_media_popularity_function.function( + conn_id, media_type="image", sql_info=sql_info ) -def test_popularity_percentile_function_calculates( - postgres_with_image_table, table_info -): +def test_popularity_percentile_function_calculates(postgres_with_image_table, sql_info): data_query = dedent( f""" - INSERT INTO {table_info.image} ( + INSERT INTO {sql_info.media_table} ( created_on, updated_on, provider, foreign_identifier, url, meta_data, license, removed_from_source ) @@ -225,10 +184,10 @@ def test_popularity_percentile_function_calculates( ) postgres_with_image_table.cursor.execute(data_query) postgres_with_image_table.connection.commit() - _set_up_popularity_percentile_function(table_info) + _set_up_popularity_percentile_function(sql_info) mp_perc_1 = dedent( f""" - SELECT {table_info.popularity_percentile}('my_provider', 'views', 0.5); + SELECT {sql_info.popularity_percentile_fn}('my_provider', 'views', 0.5); """ ) postgres_with_image_table.cursor.execute(mp_perc_1) @@ -237,7 +196,7 @@ def test_popularity_percentile_function_calculates( assert actual_percentile_val == expect_percentile_val mp_perc_2 = dedent( f""" - SELECT {table_info.popularity_percentile}('diff_provider', 'comments', 0.3); + SELECT {sql_info.popularity_percentile_fn}('diff_provider', 'comments', 0.3); """ ) postgres_with_image_table.cursor.execute(mp_perc_2) @@ -247,11 +206,11 @@ def test_popularity_percentile_function_calculates( def test_popularity_percentile_function_nones_when_missing_type( - postgres_with_image_table, table_info + postgres_with_image_table, sql_info ): data_query = dedent( f""" - INSERT INTO {table_info.image} ( + INSERT INTO {sql_info.media_table} ( created_on, updated_on, provider, foreign_identifier, url, meta_data, license, removed_from_source ) @@ -269,10 +228,10 @@ def test_popularity_percentile_function_nones_when_missing_type( ) postgres_with_image_table.cursor.execute(data_query) postgres_with_image_table.connection.commit() - _set_up_popularity_percentile_function(table_info) + _set_up_popularity_percentile_function(sql_info) mp_perc_3 = dedent( f""" - SELECT {table_info.popularity_percentile}('diff_provider', 'views', 0.3); + SELECT {sql_info.popularity_percentile_fn}('diff_provider', 'views', 0.3); """ ) postgres_with_image_table.cursor.execute(mp_perc_3) @@ -281,11 +240,11 @@ def test_popularity_percentile_function_nones_when_missing_type( def test_metrics_table_adds_values_and_constants( - postgres_with_image_table, table_info, mock_pg_hook_task + postgres_with_image_table, sql_info, mock_pg_hook_task ): data_query = dedent( f""" - INSERT INTO {table_info.image} ( + INSERT INTO {sql_info.media_table} ( created_on, updated_on, provider, foreign_identifier, url, meta_data, license, removed_from_source ) @@ -321,11 +280,11 @@ def test_metrics_table_adds_values_and_constants( "my_provider": {"metric": "views", "percentile": 0.5}, "diff_provider": {"metric": "comments", "percentile": 0.8}, } - _set_up_popularity_constants( - postgres_with_image_table, data_query, metrics, table_info, mock_pg_hook_task + _set_up_popularity_metrics_and_constants( + postgres_with_image_table, data_query, metrics, sql_info, mock_pg_hook_task ) - check_query = f"SELECT * FROM {table_info.metrics};" + check_query = f"SELECT * FROM {sql_info.metrics_table};" postgres_with_image_table.cursor.execute(check_query) expect_rows = [ ("diff_provider", "comments", 0.8, 50.0, 12.5), @@ -337,11 +296,11 @@ def test_metrics_table_adds_values_and_constants( def test_metrics_table_handles_zeros_and_missing_in_constants( - postgres_with_image_table, table_info, mock_pg_hook_task + postgres_with_image_table, sql_info, mock_pg_hook_task ): data_query = dedent( f""" - INSERT INTO {table_info.image} ( + INSERT INTO {sql_info.media_table} ( created_on, updated_on, provider, foreign_identifier, url, meta_data, license, removed_from_source ) @@ -379,11 +338,11 @@ def test_metrics_table_handles_zeros_and_missing_in_constants( # Provider that has a metric configured, but no records with data for that metric "diff_provider": {"metric": "comments", "percentile": 0.8}, } - _set_up_popularity_constants( - postgres_with_image_table, data_query, metrics, table_info, mock_pg_hook_task + _set_up_popularity_metrics_and_constants( + postgres_with_image_table, data_query, metrics, sql_info, mock_pg_hook_task ) - check_query = f"SELECT * FROM {table_info.metrics};" + check_query = f"SELECT * FROM {sql_info.metrics_table};" postgres_with_image_table.cursor.execute(check_query) expect_rows = [ ("diff_provider", "comments", 0.8, None, None), @@ -394,53 +353,12 @@ def test_metrics_table_handles_zeros_and_missing_in_constants( assert expect_row == pytest.approx(sorted_row) -def test_get_providers_with_popularity_data_for_media_type( - postgres_with_image_table, table_info, mock_pg_hook_task -): - data_query = dedent( - f""" - INSERT INTO {table_info.image} ( - created_on, updated_on, provider, foreign_identifier, url, - meta_data, license, removed_from_source - ) - VALUES - ( - NOW(), NOW(), 'my_provider', 'fid_a', 'https://test.com/a.jpg', - '{{"views": 0, "description": "cats"}}', 'cc0', false - ), - ( - NOW(), NOW(), 'diff_provider', 'fid_b', 'https://test.com/b.jpg', - '{{"views": 50, "description": "cats"}}', 'cc0', false - ), - ( - NOW(), NOW(), 'provider_without_popularity', 'fid_b', 'https://test.com/b.jpg', - '{{"views": 50, "description": "cats"}}', 'cc0', false - ) - ; - """ - ) - metrics = { - "my_provider": {"metric": "views", "percentile": 0.8}, - "diff_provider": {"metric": "comments", "percentile": 0.8}, - } - _set_up_popularity_constants( - postgres_with_image_table, data_query, metrics, table_info, mock_pg_hook_task - ) - - expected_providers = ["diff_provider", "my_provider"] - actual_providers = sql.get_providers_with_popularity_data_for_media_type( - POSTGRES_CONN_ID, media_type="image", popularity_metrics=table_info.metrics - ) - - assert actual_providers == expected_providers - - def test_standardized_popularity_function_calculates( - postgres_with_image_table, table_info, mock_pg_hook_task + postgres_with_image_table, sql_info, mock_pg_hook_task ): data_query = dedent( f""" - INSERT INTO {table_info.image} ( + INSERT INTO {sql_info.media_table} ( created_on, updated_on, provider, foreign_identifier, url, meta_data, license, removed_from_source ) @@ -466,9 +384,9 @@ def test_standardized_popularity_function_calculates( "other_provider": {"metric": "likes", "percentile": 0.5}, } _set_up_std_popularity_func( - postgres_with_image_table, data_query, metrics, table_info, mock_pg_hook_task + postgres_with_image_table, data_query, metrics, sql_info, mock_pg_hook_task ) - check_query = f"SELECT * FROM {table_info.metrics};" + check_query = f"SELECT * FROM {sql_info.metrics_table};" postgres_with_image_table.cursor.execute(check_query) print(list(postgres_with_image_table.cursor)) arg_list = [ @@ -487,7 +405,7 @@ def test_standardized_popularity_function_calculates( print(arg_list[i]) std_pop_query = dedent( f""" - SELECT {table_info.standardized_popularity}( + SELECT {sql_info.standardized_popularity_fn}( '{arg_list[i][0]}', '{arg_list[i][1]}'::jsonb ); @@ -499,73 +417,67 @@ def test_standardized_popularity_function_calculates( assert actual_std_pop_val == expect_std_pop_val -def test_image_view_calculates_std_pop( - postgres_with_image_table, table_info, mock_pg_hook_task -): - data_query = dedent( - f""" - INSERT INTO {table_info.image} ( - created_on, updated_on, provider, foreign_identifier, url, - meta_data, license, removed_from_source - ) - VALUES - ( - NOW(), NOW(), 'my_provider', 'fid_a', 'https://test.com/a.jpg', - '{{"views": 0, "description": "cats"}}', 'cc0', false - ), - ( - NOW(), NOW(), 'my_provider', 'fid_b', 'https://test.com/b.jpg', - '{{"views": 50, "description": "cats"}}', 'cc0', false - ), - ( - NOW(), NOW(), 'my_provider', 'fid_c', 'https://test.com/c.jpg', - '{{"views": 75, "description": "cats"}}', 'cc0', false - ), - ( - NOW(), NOW(), 'my_provider', 'fid_d', 'https://test.com/d.jpg', - '{{"views": 150, "description": "cats"}}', 'cc0', false - ) - """ - ) - metrics = {"my_provider": {"metric": "views", "percentile": 0.5}} - _set_up_image_view( - postgres_with_image_table, data_query, metrics, table_info, mock_pg_hook_task - ) - check_query = dedent( - f""" - SELECT foreign_identifier, standardized_popularity - FROM {table_info.image_view}; - """ - ) - postgres_with_image_table.cursor.execute(check_query) - rd = dict(postgres_with_image_table.cursor) - assert all( - [ - rd["fid_a"] == 0.0, - rd["fid_b"] == 0.5, - rd["fid_c"] == 0.6, - rd["fid_d"] == 0.75, - ] - ) - - @pytest.mark.parametrize( - "ddl_filename, metrics", + "providers, media_type, expected_confs", [ - ("0004_openledger_image_view.sql", sql.IMAGE_POPULARITY_METRICS), - ("0007_openledger_audio_view.sql", sql.AUDIO_POPULARITY_METRICS), + # No providers for this media type + ([], "image", []), + ( + ["foo_provider"], + "image", + [ + { + "query_id": "foo_provider_popularity_refresh_20230101", + "table_name": "image", + "select_query": "WHERE provider='foo_provider' AND updated_on < '2023-01-01 00:00:00'", + "update_query": "SET standardized_popularity = standardized_image_popularity(image.provider, image.meta_data)", + "batch_size": 10000, + "update_timeout": 3600.0, + "dry_run": False, + "resume_update": False, + }, + ], + ), + ( + ["my_provider", "your_provider"], + "audio", + [ + { + "query_id": "my_provider_popularity_refresh_20230101", + "table_name": "audio", + "select_query": "WHERE provider='my_provider' AND updated_on < '2023-01-01 00:00:00'", + "update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)", + "batch_size": 10000, + "update_timeout": 3600.0, + "dry_run": False, + "resume_update": False, + }, + { + "query_id": "your_provider_popularity_refresh_20230101", + "table_name": "audio", + "select_query": "WHERE provider='your_provider' AND updated_on < '2023-01-01 00:00:00'", + "update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)", + "batch_size": 10000, + "update_timeout": 3600.0, + "dry_run": False, + "resume_update": False, + }, + ], + ), ], ) -def test_ddl_matches_definitions(ddl_filename, metrics): - ddl = (DDL_DEFINITIONS_PATH / ddl_filename).read_text() - if not ( - match := re.search( - r"INSERT INTO public.\w+_popularity_metrics.*?;", - ddl, - re.MULTILINE | re.DOTALL, - ) - ): - raise ValueError(f"Could not find insert statement in ddl file {ddl_filename}") +def test_get_providers_update_confs(providers, media_type, expected_confs): + TEST_DAY = datetime(2023, 1, 1) + config = PopularityRefresh( + media_type=media_type, + refresh_popularity_batch_timeout=timedelta(hours=1), + popularity_metrics={provider: {"metric": "views"} for provider in providers}, + ) + + actual_confs = sql.get_providers_update_confs.function( + POSTGRES_CONN_ID, + config, + TEST_DAY, + ) - for provider in metrics: - assert provider in match.group(0) + assert actual_confs == expected_confs diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/justtakeitfree/single_item.json b/catalog/tests/dags/providers/provider_api_scripts/resources/justtakeitfree/single_item.json new file mode 100644 index 00000000000..acf3294abb4 --- /dev/null +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/justtakeitfree/single_item.json @@ -0,0 +1,12 @@ +[ + { + "credit_requirements": "This image is published under CC BY 4.0 licence.
Free for commercial use. Attribution required.
Please, credit: © Justtakeitfree Free Photos (CC BY 4.0)
An active hyperlink to the page should be provided.", + "credit_text": "© Justtakeitfree Free Photos (CC BY 4.0)", + "full_image_link": "https://justtakeitfree.com/photos/2.jpg", + "license": "(CC BY 4.0)", + "license_link": "https://creativecommons.org/licenses/by/4.0/deed.en", + "page_link": "https://justtakeitfree.com/photo/2/", + "preview_link": "https://justtakeitfree.com/photos/2_800.jpg", + "tags": ["Baturyn fortress", "Baturyn citadel", "cossack fortress"] + } +] diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_justtakeitfree.py b/catalog/tests/dags/providers/provider_api_scripts/test_justtakeitfree.py new file mode 100644 index 00000000000..fc26e0232c1 --- /dev/null +++ b/catalog/tests/dags/providers/provider_api_scripts/test_justtakeitfree.py @@ -0,0 +1,64 @@ +import json +from pathlib import Path +from unittest.mock import patch + +from common.licenses import get_license_info +from providers.provider_api_scripts.justtakeitfree import JusttakeitfreeDataIngester + + +RESOURCES = Path(__file__).parent / "resources/justtakeitfree" + +jtif = JusttakeitfreeDataIngester() + + +class FileSizeHeadResponse: + def __init__(self, size): + self.headers = {"Content-Length": size} + + +def test_get_next_query_params_default_response(): + actual_result = jtif.get_next_query_params(None) + actual_result.pop("key", None) + expected_result = { + "page": 1, + } + assert actual_result == expected_result + + +def test_get_next_query_params_updates_parameters(): + previous_query_params = { + "page": 1, + } + actual_result = jtif.get_next_query_params(previous_query_params) + actual_result.pop("key", None) + + expected_result = { + "page": 2, + } + assert actual_result == expected_result + + +def test_get_record_data(): + with open(RESOURCES / "single_item.json") as f: + resource_json = json.load(f) + + with patch.object(jtif.delayed_requester, "head") as head_patch: + # Returns None when 404 + head_patch.return_value = FileSizeHeadResponse(100) + actual_data = jtif.get_record_data(resource_json) + + expected_data = { + "foreign_landing_url": "https://justtakeitfree.com/photo/2/", + "url": "https://justtakeitfree.com/photos/2.jpg", + "foreign_identifier": "2", + "creator": "Justtakeitfree Free Photos", + "creator_url": "https://justtakeitfree.com", + "license_info": get_license_info( + "https://creativecommons.org/licenses/by/4.0/" + ), + "raw_tags": ["Baturyn fortress", "Baturyn citadel", "cossack fortress"], + "thumbnail_url": "https://justtakeitfree.com/photos/2_800.jpg", + "filesize": 100, + } + + assert actual_data == expected_data diff --git a/catalog/tests/dags/test_dag_parsing.py b/catalog/tests/dags/test_dag_parsing.py index e778f4a1f4e..eb90f9b8774 100644 --- a/catalog/tests/dags/test_dag_parsing.py +++ b/catalog/tests/dags/test_dag_parsing.py @@ -20,7 +20,8 @@ "maintenance/airflow_log_cleanup_workflow.py", "maintenance/pr_review_reminders/pr_review_reminders_dag.py", "maintenance/rotate_db_snapshots.py", - "database/recreate_popularity_calculation_dag_factory.py", + "popularity/recreate_popularity_calculation_dag_factory.py", + "popularity/popularity_refresh_dag_factory.py", "data_refresh/dag_factory.py", "data_refresh/create_filtered_index_dag.py", "oauth2/authorize_dag.py", @@ -34,7 +35,8 @@ "providers/provider_ingestion_workflow_dag_factory.py": len( REINGESTION_WORKFLOW_CONFIGS ), - "database/recreate_popularity_calculation_dag_factory.py": len(MEDIA_TYPES), + "popularity/recreate_popularity_calculation_dag_factory.py": len(MEDIA_TYPES), + "popularity/popularity_refresh_dag_factory.py": len(MEDIA_TYPES), "data_refresh/dag_factory.py": len(MEDIA_TYPES), "data_refresh/create_filtered_index_dag.py": len(MEDIA_TYPES), } diff --git a/catalog/tests/test_utils/sql.py b/catalog/tests/test_utils/sql.py index b58c11a4efb..2072c33f510 100644 --- a/catalog/tests/test_utils/sql.py +++ b/catalog/tests/test_utils/sql.py @@ -4,10 +4,10 @@ from airflow.models import TaskInstance -from common.constants import IMAGE -from common.loader.sql import TSV_COLUMNS, create_column_definitions +from common.loader.sql import create_column_definitions from common.storage import columns as col from common.storage.db_columns import IMAGE_TABLE_COLUMNS +from common.storage.tsv_columns import CURRENT_IMAGE_TSV_COLUMNS POSTGRES_CONN_ID = os.getenv("TEST_CONN_ID") @@ -18,7 +18,7 @@ LOADING_TABLE_COLUMN_DEFINITIONS = create_column_definitions( - TSV_COLUMNS[IMAGE], is_loading=True + CURRENT_IMAGE_TSV_COLUMNS, is_loading=True ) CREATE_LOAD_TABLE_QUERY = f"""CREATE TABLE public.{{}} ( @@ -80,7 +80,7 @@ def create_query_values( columns=None, ): if columns is None: - columns = TSV_COLUMNS[IMAGE] + columns = CURRENT_IMAGE_TSV_COLUMNS result = [] for column in columns: val = column_values.get(column.db_name) diff --git a/docker/upstream_db/0004_openledger_image_view.sql b/docker/upstream_db/0004_openledger_image_view.sql index efdf2060749..e3802f01874 100644 --- a/docker/upstream_db/0004_openledger_image_view.sql +++ b/docker/upstream_db/0004_openledger_image_view.sql @@ -42,38 +42,3 @@ $$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT; - - -CREATE MATERIALIZED VIEW image_view AS - SELECT - identifier, - created_on, - updated_on, - ingestion_type, - provider, - source, - foreign_identifier, - foreign_landing_url, - url, - thumbnail, - width, - height, - filesize, - license, - license_version, - creator, - creator_url, - title, - meta_data, - tags, - watermarked, - last_synced_with_source, - removed_from_source, - filetype, - category, - standardized_image_popularity( - image.provider, image.meta_data - ) AS standardized_popularity - FROM image; - -CREATE UNIQUE INDEX ON image_view (identifier); diff --git a/docker/upstream_db/0007_openledger_audio_view.sql b/docker/upstream_db/0007_openledger_audio_view.sql index f73748e7f89..af625f4c261 100644 --- a/docker/upstream_db/0007_openledger_audio_view.sql +++ b/docker/upstream_db/0007_openledger_audio_view.sql @@ -38,46 +38,6 @@ STABLE RETURNS NULL ON NULL INPUT; -CREATE MATERIALIZED VIEW audio_view AS - SELECT - identifier, - created_on, - updated_on, - ingestion_type, - provider, - source, - foreign_identifier, - foreign_landing_url, - url, - thumbnail, - filetype, - duration, - bit_rate, - sample_rate, - category, - genres, - audio_set, - alt_files, - filesize, - license, - license_version, - creator, - creator_url, - title, - meta_data, - tags, - watermarked, - last_synced_with_source, - removed_from_source, - audio_set ->> 'foreign_identifier' AS audio_set_foreign_identifier, - standardized_audio_popularity( - audio.provider, audio.meta_data - ) AS standardized_popularity - FROM audio; - -CREATE UNIQUE INDEX ON audio_view (identifier); - - CREATE VIEW audioset_view AS -- DISTINCT clause exists to ensure that only one record is present for a given -- foreign identifier/provider pair. This exists as a hard constraint in the API table diff --git a/documentation/changelogs/api/2023.09.11.16.56.52.md b/documentation/changelogs/api/2023.09.11.16.56.52.md new file mode 100644 index 00000000000..d1d41033505 --- /dev/null +++ b/documentation/changelogs/api/2023.09.11.16.56.52.md @@ -0,0 +1,14 @@ +# 2023.09.11.16.56.52 + +## Internal Improvements + +- remove redlock-py from api dependencies + ([#3004](https://github.com/WordPress/openverse/pull/3004)) by @rwidom +- Remove python3 openid dependency from api + ([#3005](https://github.com/WordPress/openverse/pull/3005)) by @rwidom +- remove hvac dependency from API + ([#3003](https://github.com/WordPress/openverse/pull/3003)) by @rwidom +- Bump deepdiff from 6.3.1 to 6.4.1 in /api + ([#2960](https://github.com/WordPress/openverse/pull/2960)) by @dependabot +- Switch local API dev server to use gunicorn instead of django runserver + ([#2936](https://github.com/WordPress/openverse/pull/2936)) by @ashiramin diff --git a/documentation/changelogs/api/2023.09.15.19.15.58.md b/documentation/changelogs/api/2023.09.15.19.15.58.md new file mode 100644 index 00000000000..c6e54d3eedc --- /dev/null +++ b/documentation/changelogs/api/2023.09.15.19.15.58.md @@ -0,0 +1,22 @@ +# 2023.09.15.19.15.58 + +## Improvements + +- Refactor popularity SQL + ([#2964](https://github.com/WordPress/openverse/pull/2964)) by @stacimc +- `mature` -> `sensitive` frontend copy and code + ([#3006](https://github.com/WordPress/openverse/pull/3006)) by @sarayourfriend + +## Internal Improvements + +- Update dependency Pillow to v10.0.1 + ([#3030](https://github.com/WordPress/openverse/pull/3030)) by @stacimc +- Remove `requests-oauthlib` from api dependencies + ([#3018](https://github.com/WordPress/openverse/pull/3018)) by @rwidom +- Remove django-sslserver + ([#3002](https://github.com/WordPress/openverse/pull/3002)) by @krysal + +## Bug Fixes + +- Populate sensitivities for single result view + ([#3013](https://github.com/WordPress/openverse/pull/3013)) by @dhruvkb diff --git a/documentation/changelogs/frontend/2023.09.11.17.34.23.md b/documentation/changelogs/frontend/2023.09.11.17.34.23.md new file mode 100644 index 00000000000..4de2736cc37 --- /dev/null +++ b/documentation/changelogs/frontend/2023.09.11.17.34.23.md @@ -0,0 +1,24 @@ +# 2023.09.11.17.34.23 + +## Improvements + +- Separate features into persistent and session scoped cookies + ([#2955](https://github.com/WordPress/openverse/pull/2955)) by @dhruvkb +- Add an updated `VAudioControl` component + ([#2929](https://github.com/WordPress/openverse/pull/2929)) by @obulat + +## Internal Improvements + +- Skip flaky global audio visual regression test + ([#3010](https://github.com/WordPress/openverse/pull/3010)) by @sarayourfriend +- Silo ESLint configuration into plugin package + ([#2973](https://github.com/WordPress/openverse/pull/2973)) by @sarayourfriend + +## Bug Fixes + +- Fix meta tags on pages + ([#2997](https://github.com/WordPress/openverse/pull/2997)) by @obulat +- Check flag 'sensitive_content' with 'fetch_sensitive' + ([#2995](https://github.com/WordPress/openverse/pull/2995)) by @dhruvkb +- Fix package build and refactor tsconfig to be less complex and breakable + ([#2972](https://github.com/WordPress/openverse/pull/2972)) by @sarayourfriend diff --git a/documentation/changelogs/frontend/2023.09.18.10.25.25.md b/documentation/changelogs/frontend/2023.09.18.10.25.25.md new file mode 100644 index 00000000000..67a78660afb --- /dev/null +++ b/documentation/changelogs/frontend/2023.09.18.10.25.25.md @@ -0,0 +1,22 @@ +# 2023.09.18.10.25.25 + +## Improvements + +- Update links in metadata section on single result page + ([#2851](https://github.com/WordPress/openverse/pull/2851)) by @obulat +- `mature` -> `sensitive` frontend copy and code + ([#3006](https://github.com/WordPress/openverse/pull/3006)) by @sarayourfriend + +## Internal Improvements + +- Add content safety info to `SELECT_SEARCH_RESULT` event + ([#2983](https://github.com/WordPress/openverse/pull/2983)) by @dhruvkb + +## Bug Fixes + +- Resolve flakiness in global audio player VR tests + ([#3021](https://github.com/WordPress/openverse/pull/3021)) by @dhruvkb +- Filter languages by presence of ISO + ([#3017](https://github.com/WordPress/openverse/pull/3017)) by @dhruvkb +- Media results null check + ([#3016](https://github.com/WordPress/openverse/pull/3016)) by @akhilsrivatsa diff --git a/documentation/meta/monitoring/runbooks/api_http_2xx_under_threshold.md b/documentation/meta/monitoring/runbooks/api_http_2xx_under_threshold.md new file mode 100644 index 00000000000..bcaf33a700a --- /dev/null +++ b/documentation/meta/monitoring/runbooks/api_http_2xx_under_threshold.md @@ -0,0 +1,35 @@ +# Run Book: API Production HTTP 2XX responses count under threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @krysaldb + +Alarm link: +- +``` + +## Severity Guide + +After confirming there is not a total outage, check if the overall request count +has decreased as well (go to the [CloudWatch dashboard][cloudwatch] or +alternatively check in CloudFlare). If the overall requests are lower then the +severity is low, and you should continue searching for the cause of the general +decrease. + +If the lower number is only in 2XX responses the severity is likely high, so +also check the dashboard to look for other anomalies. Go to the [API +logs][api_logs] to check for errors or data that yield clues. + +[cloudwatch]: + https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#dashboards/dashboard/ECS-Production-Dashboard +[api_logs]: + https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Fecs$252Fproduction$252Fapi + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +Nothing registered to date. diff --git a/documentation/meta/monitoring/runbooks/api_http_5xx_above_threshold.md b/documentation/meta/monitoring/runbooks/api_http_5xx_above_threshold.md new file mode 100644 index 00000000000..6c90b62e73c --- /dev/null +++ b/documentation/meta/monitoring/runbooks/api_http_5xx_above_threshold.md @@ -0,0 +1,31 @@ +# Run Book: API Production HTTP 5XX responses count above threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @krysaldb + +Alarm link: +- +``` + +## Severity Guide + +After confirming there is not a total outage, check if the increase of 5XX HTTP +errors is related to a regular time where resources are expected to be +constrained like a recent deployment, a data refresh, DB maintenance, etc. If +the spike is related to one of these events and the alarms stabilizes in the +short time then the severity is low. + +If the issue is not related to known recurrent events and persists, the severity +is critical. Check if dependent services –DB, Redis, Elasticsearch– are +available to the API or if the problem is intrinsic to itself. + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +- _2023-07-16 at 05:05 UTC: 5XX responses spike to ~50 near a database restart + during the RDS maintenance window. Unknown cause._ diff --git a/documentation/meta/monitoring/runbooks/api_request_count_above_threshold.md b/documentation/meta/monitoring/runbooks/api_request_count_above_threshold.md new file mode 100644 index 00000000000..332c7afafb6 --- /dev/null +++ b/documentation/meta/monitoring/runbooks/api_request_count_above_threshold.md @@ -0,0 +1,36 @@ +# Run Book: API Production Request Count above threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @krysaldb + +Alarm link: +- +``` + +## Severity Guide + +When a sudden increase in request count is noticed, verify that the services are +supporting the load by looking at metrics like response time or ES CPU usage for +example. If the API is doing fine, then severity is low and may only require +future resource scaling depending on the kind of traffic. + +If the services are strained then the severity is critical, search for the root +cause to prevent more serious outages. If there are no recent obvious +integrations (like the Gutenberg plugin) then follow the run book to [identify +traffic anomalies in Cloudflare][runbook_traffic], to determine whether the +recent traffic is organic or if it comes from a botnet. Find the origin of +requests and evaluate whether it needs to be blocked or if Openverse services +need to adapt to the new demand. + +[runbook_traffic]: + https://docs.openverse.org/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.html + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +Nothing registered to date. diff --git a/documentation/meta/monitoring/runbooks/index.md b/documentation/meta/monitoring/runbooks/index.md index 8119322fe26..2c60fa9706f 100644 --- a/documentation/meta/monitoring/runbooks/index.md +++ b/documentation/meta/monitoring/runbooks/index.md @@ -12,7 +12,13 @@ that can be a good resource when writing a new one. ```{toctree} :titlesonly: +api_request_count_above_threshold +api_http_2xx_under_threshold +api_http_5xx_above_threshold unhealthy_ecs_hosts +nuxt_2xx_under_threshold +nuxt_5xx_above_threshold +nuxt_request_count nuxt_avg_response_time_above_threshold nuxt_p99_response_time_above_threshold ``` diff --git a/documentation/meta/monitoring/runbooks/nuxt_2xx_under_threshold.md b/documentation/meta/monitoring/runbooks/nuxt_2xx_under_threshold.md new file mode 100644 index 00000000000..1f7d1d97ea3 --- /dev/null +++ b/documentation/meta/monitoring/runbooks/nuxt_2xx_under_threshold.md @@ -0,0 +1,33 @@ +# Run Book: Nuxt 2XX request count under threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @dhruvkb + +Alarm link: +- [production-nuxt](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/Nuxt+Production+HTTP+2XX+responses+count+under+threshold) +``` + +## Severity guide + +Confirm there is not an outage. + +Check if the overall request count has decreased as well (this can be confirmed +via the +[CloudWatch dashboard](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#dashboards/dashboard/ECS-Production-Dashboard) +or in Cloudflare). + +- If the overall requests have decreased, the severity is low. But you should + continue to investigate why the usage has decreased below the usual amount. +- If the overall requests have not decreased, a large number of those requests + must be returning non-2XX responses, which is high severity. Further + investigation is warranted to determine the cause for the non-2XX responses. + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +Nothing registered to date. diff --git a/documentation/meta/monitoring/runbooks/nuxt_5xx_above_threshold.md b/documentation/meta/monitoring/runbooks/nuxt_5xx_above_threshold.md new file mode 100644 index 00000000000..7d3705ac96f --- /dev/null +++ b/documentation/meta/monitoring/runbooks/nuxt_5xx_above_threshold.md @@ -0,0 +1,41 @@ +# Run Book: Nuxt 5XX request count above threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @dhruvkb + +Alarm link: +- [production-nuxt](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/Nuxt+Production+HTTP+5XX+responses+count+over+threshold) +``` + +## Severity guide + +Confirm there is not an outage. + +Check if the connection to the API from Nuxt has been broken, which can result +in Nuxt returning 5XX errors. + +If the connection is present and working, try to determine the source of the 5XX +errors (this can be checked by observing paths in the Cloudflare logs). + +- If the API requests are returning 2XX responses, the severity is low. But you + should continue to investigate the source of 5XX errors, which could be an + external service like Plausible. +- If the API requests are returning 5XX responses, the severity is high. Further + investigation into the API side is warranted to determine the cause for the + 5XX responses. Also refer to the + [API 5XX runbook](/meta/monitoring/runbooks/index.md). + + + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +- _2023-08-28, 12:06 to 12:24 UTC_: + + 5XX responses spiked to ~591 due to Plausible degradation. This was not + detrimental to UX. diff --git a/documentation/meta/monitoring/runbooks/nuxt_request_count.md b/documentation/meta/monitoring/runbooks/nuxt_request_count.md new file mode 100644 index 00000000000..0573055fa12 --- /dev/null +++ b/documentation/meta/monitoring/runbooks/nuxt_request_count.md @@ -0,0 +1,38 @@ +# Run Book: Nuxt request count above threshold + +```{admonition} Metadata +Status: **Unstable** + +Maintainer: @dhruvkb + +Alarm link: +- [production-nuxt](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#alarmsV2:alarm/Nuxt+Production+request+count+above+threshold) +``` + +## Severity guide + +[Identify traffic anomalies](/meta/monitoring/traffic/runbooks/identifying-and-blocking-traffic-anomalies.md) +in Cloudflare to determine if the increase is organic or due to a botnet. + +- If the increase is organic, we must update our baseline expectation of our + services' usages. The alarm thresholds should be updated if our services see + higher usage frequently and consistently. +- If the increase is a botnet attack, we need to block these agents to restore + usage to the usual level. + +We also need to verify that the requests are being handled properly and that our +services are capable of meeting this demand (this can be observed from the CPU +and memory metrics in the ECS dashboards in CloudWatch). + +- If our infra can handle the load, there is not much to do except continue to + monitor that the resources stay within reasonable limits. +- If our infra cannot handle the load, we must scale our services by increasing + capacity or adding more instances. + +## Historical false positives + +Nothing registered to date. + +## Related incident reports + +Nothing registered to date. diff --git a/frontend/feat/feature-flags.json b/frontend/feat/feature-flags.json index 0629084451e..61cd6f1b8ec 100644 --- a/frontend/feat/feature-flags.json +++ b/frontend/feat/feature-flags.json @@ -5,7 +5,7 @@ "staging": "switchable", "production": "disabled" }, - "description": "Mark 50% of results as mature to test content safety.", + "description": "Mark 50% of results as sensitive to test content safety.", "defaultState": "off", "storage": "cookie" }, diff --git a/frontend/nuxt.config.ts b/frontend/nuxt.config.ts index 410727bc353..a48a008de9e 100644 --- a/frontend/nuxt.config.ts +++ b/frontend/nuxt.config.ts @@ -106,7 +106,7 @@ const openverseLocales = [ nativeName: "English", }, ...(locales ?? []), -].filter((l) => Boolean(l.code)) as LocaleObject[] +].filter((l) => Boolean(l.iso)) as LocaleObject[] const port = process.env.PORT || 8443 const isProdNotPlaywright = isProd && !(process.env.PW === "true") diff --git a/frontend/src/components/VImageCell/VImageCell.vue b/frontend/src/components/VImageCell/VImageCell.vue index a7969cf8101..fab2eea00c9 100644 --- a/frontend/src/components/VImageCell/VImageCell.vue +++ b/frontend/src/components/VImageCell/VImageCell.vue @@ -172,6 +172,8 @@ export default defineComponent({ provider: props.image.provider, query: props.searchTerm || "", relatedTo: props.relatedTo, + sensitivities: props.image.sensitivity?.join(",") ?? "", + isBlurred: shouldBlur.value, }) } diff --git a/frontend/src/components/VMediaInfo/VMediaDetails.vue b/frontend/src/components/VMediaInfo/VMediaDetails.vue index 23b5e63d2f5..c4a319e3f5f 100644 --- a/frontend/src/components/VMediaInfo/VMediaDetails.vue +++ b/frontend/src/components/VMediaInfo/VMediaDetails.vue @@ -9,15 +9,10 @@
-
+

{{ media.description }}

- +
diff --git a/frontend/src/components/VMediaInfo/VMetadata.vue b/frontend/src/components/VMediaInfo/VMetadata.vue index 29b4ca7f62f..f072b0fccc2 100644 --- a/frontend/src/components/VMediaInfo/VMetadata.vue +++ b/frontend/src/components/VMediaInfo/VMetadata.vue @@ -1,70 +1,77 @@ + setup(props) { + const route = useRoute() + const uiStore = useUiStore() - diff --git a/frontend/src/components/VMediaInfo/VMetadataValue.vue b/frontend/src/components/VMediaInfo/VMetadataValue.vue new file mode 100644 index 00000000000..84f39aeadb5 --- /dev/null +++ b/frontend/src/components/VMediaInfo/VMetadataValue.vue @@ -0,0 +1,39 @@ + + diff --git a/frontend/src/components/VSearchResultsGrid/VAudioResult.vue b/frontend/src/components/VSearchResultsGrid/VAudioResult.vue index 47ae2bef1c1..367928afaed 100644 --- a/frontend/src/components/VSearchResultsGrid/VAudioResult.vue +++ b/frontend/src/components/VSearchResultsGrid/VAudioResult.vue @@ -14,10 +14,11 @@ - diff --git a/frontend/src/pages/image/_id/index.vue b/frontend/src/pages/image/_id/index.vue index be83ae0ba69..39a1a3bb98d 100644 --- a/frontend/src/pages/image/_id/index.vue +++ b/frontend/src/pages/image/_id/index.vue @@ -90,7 +90,7 @@ diff --git a/frontend/src/plugins/ua-parse.ts b/frontend/src/plugins/ua-parse.ts index 44b71ef7769..00275d54f7a 100644 --- a/frontend/src/plugins/ua-parse.ts +++ b/frontend/src/plugins/ua-parse.ts @@ -1,4 +1,4 @@ -import useragent, { Details as UADetails } from "express-useragent" +import { parse, Details as UADetails } from "express-useragent" import type { Plugin } from "@nuxt/types" @@ -12,7 +12,7 @@ const uaParsePlugin: Plugin = (context, inject) => { } let ua: UADetails | null if (typeof userAgent == "string") { - ua = useragent.parse(userAgent) + ua = parse(userAgent) } else { ua = null } diff --git a/frontend/src/stores/provider.ts b/frontend/src/stores/provider.ts index 4ae5b30bb99..4e2c6ac6021 100644 --- a/frontend/src/stores/provider.ts +++ b/frontend/src/stores/provider.ts @@ -89,6 +89,12 @@ export const useProviderStore = defineStore("provider", { return this.providers }, + _getProvider(providerCode: string, mediaType: SupportedMediaType) { + return this.providers[mediaType].find( + (p) => p.source_name === providerCode + ) + }, + /** * Returns the display name for provider if available, or capitalizes the given providerCode. * @@ -96,12 +102,18 @@ export const useProviderStore = defineStore("provider", { * @param mediaType - mediaType of the provider */ getProviderName(providerCode: string, mediaType: SupportedMediaType) { - const provider = this.providers[mediaType].find( - (p) => p.source_name === providerCode - ) + const provider = this._getProvider(providerCode, mediaType) return provider?.display_name || capital(providerCode) }, + /** + * Returns the source URL given the source code and media type. + */ + getSourceUrl(providerCode: string, mediaType: SupportedMediaType) { + const provider = this._getProvider(providerCode, mediaType) + return provider?.source_url + }, + /** * Fetches provider data if no data is available, or if the data is too old. * On successful fetch updates lastUpdated value. diff --git a/frontend/src/types/analytics.ts b/frontend/src/types/analytics.ts index eb5587ea1a4..70bac76bdad 100644 --- a/frontend/src/types/analytics.ts +++ b/frontend/src/types/analytics.ts @@ -260,6 +260,10 @@ export type Events = { provider: string /** The search term */ query: string + /** the reasons for why this result is considered sensitive */ + sensitivities: string + /** whether the result was blurred or visible when selected by the user */ + isBlurred: boolean | null } /** * Description: When a user opens the external sources popover. diff --git a/frontend/src/types/media.ts b/frontend/src/types/media.ts index bfdd66e1fdf..416ecd0c4b7 100644 --- a/frontend/src/types/media.ts +++ b/frontend/src/types/media.ts @@ -50,7 +50,6 @@ export interface Media { tags: Tag[] fields_matched?: string[] - mature: boolean sensitivity: Sensitivity[] isSensitive: boolean } diff --git a/frontend/src/utils/content-safety.ts b/frontend/src/utils/content-safety.ts index 03d0bd5c7a0..967d6201975 100644 --- a/frontend/src/utils/content-safety.ts +++ b/frontend/src/utils/content-safety.ts @@ -18,7 +18,7 @@ import { * * @param id - the ID of the item for which to calculate the flags * @param frac - the fraction of items to probabilistically flag - * @returns an array of strings representing the mature flags + * @returns an array of strings representing the sensitivity flags */ export const getFakeSensitivities = (id: string, frac = 0.5): Sensitivity[] => { const random = prng(hash(id))() @@ -39,6 +39,6 @@ export const getFakeSensitivities = (id: string, frac = 0.5): Sensitivity[] => { sensitivity.push(TEXT_FILTERED) } - log("Fake mature", id, sensitivity) + log("Fake sensitive", id, sensitivity) return sensitivity } diff --git a/frontend/src/utils/decode-media-data.ts b/frontend/src/utils/decode-media-data.ts index f4a69b4b521..55482d5fd22 100644 --- a/frontend/src/utils/decode-media-data.ts +++ b/frontend/src/utils/decode-media-data.ts @@ -101,13 +101,14 @@ export const decodeMediaData = ( media: ApiMedia, mediaType: T["frontendMediaType"] ): T => { - // Fake ~50% of results as mature. + // Fake ~50% of results as sensitive. const featureFlagStore = useFeatureFlagStore() const sensitivity = featureFlagStore.isOn("fake_sensitive") && featureFlagStore.isOn("fetch_sensitive") ? getFakeSensitivities(media.id) : media[SENSITIVITY_RESPONSE_PARAM] ?? [] + sensitivity.sort() const isSensitive = sensitivity.length > 0 return { diff --git a/frontend/src/utils/metadata.ts b/frontend/src/utils/metadata.ts index 11920530dcf..a67ff902ec4 100644 --- a/frontend/src/utils/metadata.ts +++ b/frontend/src/utils/metadata.ts @@ -1,5 +1,9 @@ +import { title } from "case" + import type { AudioDetail, ImageDetail, Metadata } from "~/types/media" -import { IMAGE } from "~/constants/media" +import { AUDIO, IMAGE } from "~/constants/media" + +import { useProviderStore } from "~/stores/provider" import type { NuxtI18nInstance } from "@nuxtjs/i18n" @@ -35,23 +39,43 @@ export const getMediaMetadata = ( imageInfo?: { width?: number; height?: number; type?: string } ) => { const metadata: Metadata[] = [] - if (media.frontendMediaType === IMAGE) { - const mediaTypeString = getImageType(imageInfo?.type, i18n) + if (media.source && media.providerName !== media.sourceName) { metadata.push({ - label: "mediaDetails.information.type", - value: mediaTypeString.toString().toUpperCase(), + label: "mediaDetails.providerLabel", + value: media.providerName || media.provider, }) - if (media.providerName !== media.sourceName) { - metadata.push({ - label: "mediaDetails.providerLabel", - value: media.providerName || media.provider, - }) - } + } + const sourceUrl = useProviderStore().getSourceUrl( + media.source ?? media.provider, + media.frontendMediaType + ) + const sourceName = media.sourceName ?? media.providerName ?? media.provider + metadata.push({ + label: "mediaDetails.sourceLabel", + source: media.source ?? media.provider, + url: sourceUrl, + value: sourceName, + }) + + if (media.category) { metadata.push({ - label: "mediaDetails.sourceLabel", - value: media.sourceName ?? media.providerName ?? media.provider, - url: media.foreign_landing_url, + label: "mediaDetails.information.category", + value: i18n + .t(`filters.${media.frontendMediaType}Categories.${media.category}`) + .toString(), }) + } + + const mediaTypeString = + media.frontendMediaType === IMAGE + ? getImageType(imageInfo?.type, i18n) + : getAudioType(media, i18n) + metadata.push({ + label: "mediaDetails.information.type", + value: mediaTypeString.toString().toUpperCase(), + }) + + if (media.frontendMediaType === IMAGE) { metadata.push({ label: "imageDetails.information.dimensions", value: `${i18n.t("imageDetails.information.sizeInPixels", { @@ -59,51 +83,29 @@ export const getMediaMetadata = ( height: imageInfo?.height, })}`, }) - } else { - const mediaTypeString = getAudioType(media, i18n) + } + if (media.frontendMediaType === AUDIO) { if (media.audio_set) { - metadata.push({ + metadata.unshift({ label: "audioDetails.table.album", value: media.audio_set.title, url: media.audio_set.foreign_landing_url, }) } - if (media.category) { - const categoryKey = `filters.audioCategories.${media.category}` + if (media.genres && media.genres.length > 0) { metadata.push({ - label: "mediaDetails.information.type", - value: `${i18n.t(categoryKey)}`, + label: "audioDetails.table.genre", + value: media.genres.map((genre) => title(genre)).join(", "), }) } + if (media.sample_rate) { metadata.push({ label: "audioDetails.table.sampleRate", - value: `${media.sample_rate}`, - }) - } - if (media.filetype) { - metadata.push({ - label: "audioDetails.table.filetype", - value: mediaTypeString.toString().toUpperCase(), - }) - } - metadata.push({ - label: "mediaDetails.providerLabel", - value: media.providerName || media.provider, - url: media.foreign_landing_url, - }) - if (media.source && media.providerName !== media.sourceName) { - metadata.push({ - label: "mediaDetails.sourceLabel", - value: media.sourceName ?? media.providerName ?? media.provider, - }) - } - if (media.genres && media.genres.length > 0) { - metadata.push({ - label: "audioDetails.table.genre", - value: media.genres.join(", "), + value: media.sample_rate.toString(), }) } } + return metadata } diff --git a/frontend/test/locales/ar.json b/frontend/test/locales/ar.json index a7037ed67d6..ea73c9f63fc 100644 --- a/frontend/test/locales/ar.json +++ b/frontend/test/locales/ar.json @@ -213,8 +213,8 @@ "subLabel": "مطلوب", "placeholder": "الرجاء إدخال 20 حرفًا على الأقل." }, - "mature": { - "option": "هل يحتوي على محتويات للبالغين", + "sensitive": { + "option": "يحتوي على محتوى حساس", "subLabel": "اختياري", "placeholder": "اختياريًا ، قدم وصفًا." }, diff --git a/frontend/test/locales/ru.json b/frontend/test/locales/ru.json index 86bed2bc98b..c77de966053 100644 --- a/frontend/test/locales/ru.json +++ b/frontend/test/locales/ru.json @@ -30,9 +30,10 @@ "contentReport": { "short": "Пожаловаться", "form": { - "mature": { + "sensitive": { "subLabel": "Необязательно", - "option": "Содержимое для взрослой аудитории" + "option": "Содержит конфиденциальный контент", + "placeholder": "По желанию укажите описание" }, "other": { "placeholder": "Введите не менее 20 символов.", diff --git a/frontend/test/playwright/e2e/all-results-analytics.spec.ts b/frontend/test/playwright/e2e/all-results-analytics.spec.ts index ef81c22ebd9..c39393f4125 100644 --- a/frontend/test/playwright/e2e/all-results-analytics.spec.ts +++ b/frontend/test/playwright/e2e/all-results-analytics.spec.ts @@ -28,6 +28,8 @@ test.describe("all results grid analytics test", () => { relatedTo: null, id: "2e38ac1e-830c-4e9c-b13d-2c9a1ad53f95", provider: "jamendo", + sensitivities: "", + isBlurred: false, }) }) @@ -47,6 +49,8 @@ test.describe("all results grid analytics test", () => { query: "birds", provider: "flickr", relatedTo: null, + sensitivities: "", + isBlurred: false, }) }) diff --git a/frontend/test/playwright/e2e/report-media.spec.ts b/frontend/test/playwright/e2e/report-media.spec.ts index bab3430087d..50e04750fab 100644 --- a/frontend/test/playwright/e2e/report-media.spec.ts +++ b/frontend/test/playwright/e2e/report-media.spec.ts @@ -58,13 +58,13 @@ const submitDmcaReport = async (page: Page, context: BrowserContext) => { return expect(newPage.url()).toContain("https://docs.google.com/forms") } -// todo: Test a mature report with the optional description field -const submitMatureContentReport = async ( +// todo: Test a sensitive report with the optional description field +const submitSensitiveContentReport = async ( page: Page, context: BrowserContext ) => { await mockReportingEndpoint(context) - await page.click('text="Contains mature content"') + await page.click('text="Contains sensitive content"') const response = await submitApiReport(page) return expect(response.status()).toBe(200) } @@ -86,7 +86,7 @@ test.beforeEach(async ({ context }) => { const reports = { dmca: submitDmcaReport, - mature: submitMatureContentReport, + sensitive: submitSensitiveContentReport, other: submitOtherReport, } diff --git a/frontend/test/playwright/e2e/search-query-client.spec.ts b/frontend/test/playwright/e2e/search-query-client.spec.ts index 7f04ab7b2f1..23347beebaf 100644 --- a/frontend/test/playwright/e2e/search-query-client.spec.ts +++ b/frontend/test/playwright/e2e/search-query-client.spec.ts @@ -111,7 +111,7 @@ test.describe("search query on CSR", () => { .getByLabel(/Turn on sensitive content fetching and blurring/i) .check() await page - .getByLabel(/Mark 50% of results as mature to test content safety./i) + .getByLabel(/Mark 50% of results as sensitive to test content safety./i) .check() await goToSearchTerm(page, "cat", { mode: "CSR" }) diff --git a/frontend/test/playwright/e2e/search-query-server.spec.ts b/frontend/test/playwright/e2e/search-query-server.spec.ts index 0c0b659ceec..81f3ccf24e1 100644 --- a/frontend/test/playwright/e2e/search-query-server.spec.ts +++ b/frontend/test/playwright/e2e/search-query-server.spec.ts @@ -19,7 +19,7 @@ import { ALL_MEDIA, AUDIO, IMAGE } from "~/constants/media" * 3. query parameters are used to set the filter data: * 3a. One of each values for `all` content * 3b. Several query values - several filter checkboxes - * 3c. Mature filter + * 3c. Sensitive results filter * 3d. Query parameters that are not used for current media type are discarded * All of these tests test server-generated search page, not the one generated on the client */ diff --git a/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-focused-md-linux.png b/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-focused-md-linux.png index 7721b1d0215..d978f62de81 100644 Binary files a/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-focused-md-linux.png and b/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-focused-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-unfocused-md-linux.png b/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-unfocused-md-linux.png index 7721b1d0215..d978f62de81 100644 Binary files a/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-unfocused-md-linux.png and b/frontend/test/playwright/visual-regression/components/content-report-form.spec.ts-snapshots/content-report-unfocused-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/components/global-audio-player.spec.ts b/frontend/test/playwright/visual-regression/components/global-audio-player.spec.ts index 9cd5d8c4d90..211c86a852f 100644 --- a/frontend/test/playwright/visual-regression/components/global-audio-player.spec.ts +++ b/frontend/test/playwright/visual-regression/components/global-audio-player.spec.ts @@ -28,6 +28,9 @@ for (const dir of languageDirections) { .click() // To make the tests consistent, set the played area to the same position await page.mouse.click(170, 650) + // Allow audio to buffer to the seeked position + // eslint-disable-next-line playwright/no-networkidle + await page.waitForLoadState("networkidle") await expectSnapshot(`global-audio-player-on-search-${dir}.png`, page) }) }) diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts index f86673bde40..d58b25e1e6f 100644 --- a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts +++ b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts @@ -40,9 +40,7 @@ for (const mediaType of supportedMediaTypes) { { fullPage: true, }, - { - maxDiffPixelRatio: 0.01, - } + { maxDiffPixelRatio: 0.01 } ) }) }) diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-2xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-2xl-linux.png index 89116251743..be0d50840bb 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-2xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-2xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-lg-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-lg-linux.png index d7fe24880a9..3b2673bcad1 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-lg-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-lg-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-md-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-md-linux.png index d51c42898f2..2af9ea2696c 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-md-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-sm-linux.png index cdb98484511..3501736e846 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xl-linux.png index f281152f340..9334e0b26b2 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xs-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xs-linux.png index 6f616dec7f1..dd41c8fdce1 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xs-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-ltr-from-search-results-xs-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-2xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-2xl-linux.png index 705d4a4c59e..9243bee47cf 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-2xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-2xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-lg-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-lg-linux.png index 35ba86f0d85..b894db721e8 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-lg-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-lg-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-md-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-md-linux.png index 60d2b80200a..9877e1900db 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-md-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-sm-linux.png index 1f168578fb3..e27eaa56f4d 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xl-linux.png index 95e275235c6..267596a7c7f 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xs-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xs-linux.png index ed3a1e5c318..3dbf0ca2e4f 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xs-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/audio-rtl-from-search-results-xs-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-2xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-2xl-linux.png index 6b77cf4da73..100865bc8a6 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-2xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-2xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-lg-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-lg-linux.png index 0af80084c2a..b9bfdc02090 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-lg-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-lg-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-md-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-md-linux.png index edcaae2fc6b..964a84e9564 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-md-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-sm-linux.png index dd1d4bedcac..2901f982b98 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xl-linux.png index 9b763ec443b..3dc48e53848 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xs-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xs-linux.png index 072552a839f..52a12de7d18 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xs-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-ltr-from-search-results-xs-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-2xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-2xl-linux.png index a3105aa78de..82076288339 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-2xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-2xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-lg-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-lg-linux.png index 150032f8772..00c90dc2552 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-lg-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-lg-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-md-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-md-linux.png index 04d727546ed..72d249ad245 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-md-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-md-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-sm-linux.png index ee58d9a5ff1..cf981653468 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xl-linux.png index f132417bbc3..131b945f735 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xs-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xs-linux.png index de08669c876..63198f9a6f0 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xs-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/image-rtl-from-search-results-xs-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-sm-linux.png index d985c04c647..6353836d393 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-xl-linux.png index ae86975602e..47b1403a481 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/ltr-full-page-report-xl-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-sm-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-sm-linux.png index dc1a4bc856f..bbb1d32c930 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-sm-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-sm-linux.png differ diff --git a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-xl-linux.png b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-xl-linux.png index 5a6bf0d2f69..a1cceafee3f 100644 Binary files a/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-xl-linux.png and b/frontend/test/playwright/visual-regression/pages/pages-single-result.spec.ts-snapshots/rtl-full-page-report-xl-linux.png differ diff --git a/frontend/test/unit/fixtures/audio.js b/frontend/test/unit/fixtures/audio.js index e2b148dc125..ff8db994659 100644 --- a/frontend/test/unit/fixtures/audio.js +++ b/frontend/test/unit/fixtures/audio.js @@ -37,7 +37,6 @@ export const getAudioObj = (overrides = {}) => alt_files: null, attribution: '"La vie des bêtes" by AS-POTIRONT! is licensed under CC BY-NC-SA 2.5. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/2.5/.', - mature: null, thumbnail: "https://localhost:8000/v1/audio/e19345b8-6937-49f7-a0fd-03bf057efc28/thumb", waveform: @@ -48,7 +47,7 @@ export const getAudioObj = (overrides = {}) => related_url: "http://localhost:8000/v1/audio/e19345b8-6937-49f7-a0fd-03bf057efc28/recommendations", peaks: [], - isSensitive: false, + unstable_sensitivity: [], }, overrides ) diff --git a/frontend/test/unit/fixtures/image.js b/frontend/test/unit/fixtures/image.js index c70762ef91b..b9c444636c4 100644 --- a/frontend/test/unit/fixtures/image.js +++ b/frontend/test/unit/fixtures/image.js @@ -24,7 +24,6 @@ export const image = { attribution: '"Cat cafe in Seoul" by toel-uru is licensed under CC BY-NC-SA 2.0. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/2.0/.', fields_matched: ["description", "tags.name", "title"], - mature: false, height: 681, width: 1024, thumbnail: diff --git a/frontend/test/unit/specs/components/VMediaInfo/v-media-details.spec.js b/frontend/test/unit/specs/components/VMediaInfo/v-media-details.spec.js index 520e7f3a985..8515dbd92c7 100644 --- a/frontend/test/unit/specs/components/VMediaInfo/v-media-details.spec.js +++ b/frontend/test/unit/specs/components/VMediaInfo/v-media-details.spec.js @@ -5,6 +5,16 @@ import { render } from "~~/test/unit/test-utils/render" import VMediaDetails from "~/components/VMediaInfo/VMediaDetails.vue" +jest.mock("@nuxtjs/composition-api", () => { + return { + ...jest.requireActual("@nuxtjs/composition-api"), + useRoute: jest.fn().mockReturnValue({ + value: { + name: "audio-id__en", + }, + }), + } +}) describe("VMediaDetails", () => { let options let props @@ -24,15 +34,16 @@ describe("VMediaDetails", () => { media: getAudioObj(overrides), } options = { + mocks: { route: { value: { name: "audio-id" } } }, propsData: props, - stubs: ["VAudioThumbnail", "VLink"], + stubs: ["VAudioThumbnail"], } }) it("renders the album title", () => { render(VMediaDetails, options) - const album = screen.getByText(overrides.audio_set.title) + const album = screen.getByRole("link", { name: overrides.audio_set.title }) expect(album).toHaveAttribute( "href", overrides.audio_set.foreign_landing_url diff --git a/frontend/test/unit/specs/components/v-content-report-form.spec.js b/frontend/test/unit/specs/components/v-content-report-form.spec.js index 3505817d729..77bffd7c83f 100644 --- a/frontend/test/unit/specs/components/v-content-report-form.spec.js +++ b/frontend/test/unit/specs/components/v-content-report-form.spec.js @@ -16,9 +16,9 @@ const getDmcaInput = () => screen.queryByRole("radio", { name: /Infringes copyright/i, }) -const getMatureInput = () => +const getSensitiveInput = () => screen.queryByRole("radio", { - name: /mature/i, + name: /sensitive/i, }) const getOtherInput = () => screen.queryByRole("radio", { @@ -75,7 +75,7 @@ describe("VContentReportForm", () => { it("should contain the correct contents", async () => { await render(VContentReportForm, options) expect(getDmcaInput()).toBeVisible() - expect(getMatureInput()).toBeVisible() + expect(getSensitiveInput()).toBeVisible() expect(getOtherInput()).toBeVisible() expect(getCancelButton()).toBeVisible() // By default, DMCA is selected, and we show a link to @@ -86,7 +86,7 @@ describe("VContentReportForm", () => { it("should render thank you note when report is sent", async () => { const { queryByText } = render(VContentReportForm, options) - await fireEvent.click(getMatureInput()) + await fireEvent.click(getSensitiveInput()) await fireEvent.click(getReportButton()) // Submission successful message @@ -99,7 +99,7 @@ describe("VContentReportForm", () => { ReportService.sendReport = () => Promise.reject() const { queryByText } = render(VContentReportForm, options) - await fireEvent.click(getMatureInput()) + await fireEvent.click(getSensitiveInput()) await fireEvent.click(getReportButton()) // Submission error message @@ -130,16 +130,16 @@ describe("VContentReportForm", () => { getDescriptionTextarea() }) - it("should dispatch SEND_CONTENT_REPORT on next when mature is selected", async () => { + it("should dispatch SEND_CONTENT_REPORT on next when sensitive is selected", async () => { ReportService.sendReport = jest.fn() render(VContentReportForm, options) - await fireEvent.click(getMatureInput()) + await fireEvent.click(getSensitiveInput()) await fireEvent.click(getReportButton()) expect(ReportService.sendReport).toHaveBeenCalledWith({ identifier: props.media.id, - reason: "mature", + reason: "sensitive", mediaType: props.media.frontendMediaType, description: "", }) diff --git a/ingestion_server/Pipfile b/ingestion_server/Pipfile index dbb42a99184..bbc31a5f1d7 100644 --- a/ingestion_server/Pipfile +++ b/ingestion_server/Pipfile @@ -15,7 +15,8 @@ pook = "~=1.0" aws-requests-auth = "~=0.4" boto3 = "~=1.28" bottle = "~=0.12" -elasticsearch-dsl = "~=7.4" +elasticsearch = "==8.8.2" +elasticsearch-dsl = "~=8.9" falcon = "~=3.1" filelock = "~=3.12" gunicorn = "~=21.2" diff --git a/ingestion_server/Pipfile.lock b/ingestion_server/Pipfile.lock index 5fbd2eaa0a0..ab40764b401 100644 --- a/ingestion_server/Pipfile.lock +++ b/ingestion_server/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "16663b2ce054593ba6e6c336119c2a65099ce81d4ec102c62e5c335e7b7eee7b" + "sha256": "25098bab5ca1af1b01e69943d69b5df7d8d882e80f44a5b6555a94f5752661ca" }, "pipfile-spec": 6, "requires": { @@ -34,19 +34,19 @@ }, "boto3": { "hashes": [ - "sha256:6ff9a5b815e106656596064d51c9b6ba97a307807baa5f89634384b7d3f7ecc6", - "sha256:bd7c760afb195eaeaab907dc6b2c21fa64ddbba3fed4a869e80d820ddbd6cc70" + "sha256:ec7895504e3b2dd35fbdb7397bc3c48daaba8e6f37bc436aa928ff4e745f0f1c", + "sha256:fed2d673fce33384697baa0028edfd18b06aa17af5c3ef82da75e9254a8ffb07" ], "index": "pypi", - "version": "==1.28.40" + "version": "==1.28.48" }, "botocore": { "hashes": [ - "sha256:ce22a82ef8674f49691477d09558992cc87e7331f65c6a5b0da897ab192240ca", - "sha256:df766969f0d9ef9eda1a9c9946e0e173c10199f37a9e4c92861f11ddb5c9e702" + "sha256:6ed16f66aa6ed6070fed26d69764cb14c7759e4cc0b1c191283cc48b05d65de9", + "sha256:9618c06f7e08ed590dae6613b8b2511055f7d6c07517382143ef8563169d4ef1" ], "markers": "python_version >= '3.7'", - "version": "==1.31.40" + "version": "==1.31.48" }, "bottle": { "hashes": [ @@ -142,24 +142,32 @@ "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" ], - "markers": "python_version >= '3.7'", + "markers": "python_full_version >= '3.7.0'", "version": "==3.2.0" }, + "elastic-transport": { + "hashes": [ + "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815", + "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10" + ], + "markers": "python_version >= '3.6'", + "version": "==8.4.0" + }, "elasticsearch": { "hashes": [ - "sha256:0e2454645dc00517dee4c6de3863411a9c5f1955d013c5fefa29123dadc92f98", - "sha256:66c4ece2adfe7cc120e2b6a6798a1fd5c777aecf82eec39bb95cef7cfc7ea2b3" + "sha256:bed8cf8fcc6c3be7c254b579de4c29afab021f373c832246f912d37aef3c6bd5", + "sha256:bffd6ce4faaacf90e6f617241773b3da8fb94e2e83554f5508e2fab92ca79643" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", - "version": "==7.17.9" + "index": "pypi", + "version": "==8.8.2" }, "elasticsearch-dsl": { "hashes": [ - "sha256:07ee9c87dc28cc3cae2daa19401e1e18a172174ad9e5ca67938f752e3902a1d5", - "sha256:97f79239a252be7c4cce554c29e64695d7ef6a4828372316a5e5ff815e7a7498" + "sha256:66410adf881f02b8a032e8a5b2a3ee093fdeede4b814fbf04c0f6ce0499b7472", + "sha256:ab266bcf84b0f23bd2d73d9b31e054b5d38b20279cf076c53873f46b6dabf747" ], "index": "pypi", - "version": "==7.4.1" + "version": "==8.9.0" }, "falcon": { "hashes": [ @@ -201,11 +209,11 @@ }, "filelock": { "hashes": [ - "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d", - "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb" + "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4", + "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd" ], "index": "pypi", - "version": "==3.12.3" + "version": "==3.12.4" }, "gunicorn": { "hashes": [ @@ -369,106 +377,106 @@ }, "rpds-py": { "hashes": [ - "sha256:00215f6a9058fbf84f9d47536902558eb61f180a6b2a0fa35338d06ceb9a2e5a", - "sha256:0028eb0967942d0d2891eae700ae1a27b7fd18604cfcb16a1ef486a790fee99e", - "sha256:0155c33af0676fc38e1107679be882077680ad1abb6303956b97259c3177e85e", - "sha256:063411228b852fb2ed7485cf91f8e7d30893e69b0acb207ec349db04cccc8225", - "sha256:0700c2133ba203c4068aaecd6a59bda22e06a5e46255c9da23cbf68c6942215d", - "sha256:08e08ccf5b10badb7d0a5c84829b914c6e1e1f3a716fdb2bf294e2bd01562775", - "sha256:0d292cabd7c8335bdd3237ded442480a249dbcdb4ddfac5218799364a01a0f5c", - "sha256:15932ec5f224b0e35764dc156514533a4fca52dcfda0dfbe462a1a22b37efd59", - "sha256:18f87baa20e02e9277ad8960cd89b63c79c05caf106f4c959a9595c43f2a34a5", - "sha256:1a6420a36975e0073acaeee44ead260c1f6ea56812cfc6c31ec00c1c48197173", - "sha256:1b401e8b9aece651512e62c431181e6e83048a651698a727ea0eb0699e9f9b74", - "sha256:1d7b7b71bcb82d8713c7c2e9c5f061415598af5938666beded20d81fa23e7640", - "sha256:23750a9b8a329844ba1fe267ca456bb3184984da2880ed17ae641c5af8de3fef", - "sha256:23a059143c1393015c68936370cce11690f7294731904bdae47cc3e16d0b2474", - "sha256:26d9fd624649a10e4610fab2bc820e215a184d193e47d0be7fe53c1c8f67f370", - "sha256:291c9ce3929a75b45ce8ddde2aa7694fc8449f2bc8f5bd93adf021efaae2d10b", - "sha256:298e8b5d8087e0330aac211c85428c8761230ef46a1f2c516d6a2f67fb8803c5", - "sha256:2c7c4266c1b61eb429e8aeb7d8ed6a3bfe6c890a1788b18dbec090c35c6b93fa", - "sha256:2d68a8e8a3a816629283faf82358d8c93fe5bd974dd2704152394a3de4cec22a", - "sha256:344b89384c250ba6a4ce1786e04d01500e4dac0f4137ceebcaad12973c0ac0b3", - "sha256:3455ecc46ea443b5f7d9c2f946ce4017745e017b0d0f8b99c92564eff97e97f5", - "sha256:3d544a614055b131111bed6edfa1cb0fb082a7265761bcb03321f2dd7b5c6c48", - "sha256:3e5c26905aa651cc8c0ddc45e0e5dea2a1296f70bdc96af17aee9d0493280a17", - "sha256:3f5cc8c7bc99d2bbcd704cef165ca7d155cd6464c86cbda8339026a42d219397", - "sha256:4992266817169997854f81df7f6db7bdcda1609972d8ffd6919252f09ec3c0f6", - "sha256:4d55528ef13af4b4e074d067977b1f61408602f53ae4537dccf42ba665c2c7bd", - "sha256:576da63eae7809f375932bfcbca2cf20620a1915bf2fedce4b9cc8491eceefe3", - "sha256:58fc4d66ee349a23dbf08c7e964120dc9027059566e29cf0ce6205d590ed7eca", - "sha256:5b9bf77008f2c55dabbd099fd3ac87009471d223a1c7ebea36873d39511b780a", - "sha256:5e7996aed3f65667c6dcc8302a69368435a87c2364079a066750a2eac75ea01e", - "sha256:5f7487be65b9c2c510819e744e375bd41b929a97e5915c4852a82fbb085df62c", - "sha256:6388e4e95a26717b94a05ced084e19da4d92aca883f392dffcf8e48c8e221a24", - "sha256:65af12f70355de29e1092f319f85a3467f4005e959ab65129cb697169ce94b86", - "sha256:668d2b45d62c68c7a370ac3dce108ffda482b0a0f50abd8b4c604a813a59e08f", - "sha256:71333c22f7cf5f0480b59a0aef21f652cf9bbaa9679ad261b405b65a57511d1e", - "sha256:7150b83b3e3ddaac81a8bb6a9b5f93117674a0e7a2b5a5b32ab31fdfea6df27f", - "sha256:748e472345c3a82cfb462d0dff998a7bf43e621eed73374cb19f307e97e08a83", - "sha256:75dbfd41a61bc1fb0536bf7b1abf272dc115c53d4d77db770cd65d46d4520882", - "sha256:7618a082c55cf038eede4a918c1001cc8a4411dfe508dc762659bcd48d8f4c6e", - "sha256:780fcb855be29153901c67fc9c5633d48aebef21b90aa72812fa181d731c6b00", - "sha256:78d10c431073dc6ebceed35ab22948a016cc2b5120963c13a41e38bdde4a7212", - "sha256:7a3a3d3e4f1e3cd2a67b93a0b6ed0f2499e33f47cc568e3a0023e405abdc0ff1", - "sha256:7b6975d3763d0952c111700c0634968419268e6bbc0b55fe71138987fa66f309", - "sha256:80772e3bda6787510d9620bc0c7572be404a922f8ccdfd436bf6c3778119464c", - "sha256:80992eb20755701753e30a6952a96aa58f353d12a65ad3c9d48a8da5ec4690cf", - "sha256:841128a22e6ac04070a0f84776d07e9c38c4dcce8e28792a95e45fc621605517", - "sha256:861d25ae0985a1dd5297fee35f476b60c6029e2e6e19847d5b4d0a43a390b696", - "sha256:872f3dcaa8bf2245944861d7311179d2c0c9b2aaa7d3b464d99a7c2e401f01fa", - "sha256:87c93b25d538c433fb053da6228c6290117ba53ff6a537c133b0f2087948a582", - "sha256:8856aa76839dc234d3469f1e270918ce6bec1d6a601eba928f45d68a15f04fc3", - "sha256:885e023e73ce09b11b89ab91fc60f35d80878d2c19d6213a32b42ff36543c291", - "sha256:899b5e7e2d5a8bc92aa533c2d4e55e5ebba095c485568a5e4bedbc163421259a", - "sha256:8ce8caa29ebbdcde67e5fd652c811d34bc01f249dbc0d61e5cc4db05ae79a83b", - "sha256:8e1c68303ccf7fceb50fbab79064a2636119fd9aca121f28453709283dbca727", - "sha256:8e7e2b3577e97fa43c2c2b12a16139b2cedbd0770235d5179c0412b4794efd9b", - "sha256:92f05fc7d832e970047662b3440b190d24ea04f8d3c760e33e7163b67308c878", - "sha256:97f5811df21703446b42303475b8b855ee07d6ab6cdf8565eff115540624f25d", - "sha256:9affee8cb1ec453382c27eb9043378ab32f49cd4bc24a24275f5c39bf186c279", - "sha256:a2da4a8c6d465fde36cea7d54bf47b5cf089073452f0e47c8632ecb9dec23c07", - "sha256:a6903cdca64f1e301af9be424798328c1fe3b4b14aede35f04510989fc72f012", - "sha256:a8ab1adf04ae2d6d65835995218fd3f3eb644fe20655ca8ee233e2c7270ff53b", - "sha256:a8edd467551c1102dc0f5754ab55cd0703431cd3044edf8c8e7d9208d63fa453", - "sha256:ac00c41dd315d147b129976204839ca9de699d83519ff1272afbe4fb9d362d12", - "sha256:ad277f74b1c164f7248afa968700e410651eb858d7c160d109fb451dc45a2f09", - "sha256:ae46a50d235f1631d9ec4670503f7b30405103034830bc13df29fd947207f795", - "sha256:afe6b5a04b2ab1aa89bad32ca47bf71358e7302a06fdfdad857389dca8fb5f04", - "sha256:b1cb078f54af0abd835ca76f93a3152565b73be0f056264da45117d0adf5e99c", - "sha256:b25136212a3d064a8f0b9ebbb6c57094c5229e0de76d15c79b76feff26aeb7b8", - "sha256:b3226b246facae14909b465061ddcfa2dfeadb6a64f407f24300d42d69bcb1a1", - "sha256:b98e75b21fc2ba5285aef8efaf34131d16af1c38df36bdca2f50634bea2d3060", - "sha256:bbd7b24d108509a1b9b6679fcc1166a7dd031dbef1f3c2c73788f42e3ebb3beb", - "sha256:bed57543c99249ab3a4586ddc8786529fbc33309e5e8a1351802a06ca2baf4c2", - "sha256:c0583f69522732bdd79dca4cd3873e63a29acf4a299769c7541f2ca1e4dd4bc6", - "sha256:c1e0e9916301e3b3d970814b1439ca59487f0616d30f36a44cead66ee1748c31", - "sha256:c651847545422c8131660704c58606d841e228ed576c8f1666d98b3d318f89da", - "sha256:c7853f27195598e550fe089f78f0732c66ee1d1f0eaae8ad081589a5a2f5d4af", - "sha256:cbae50d352e4717ffc22c566afc2d0da744380e87ed44a144508e3fb9114a3f4", - "sha256:cdbed8f21204398f47de39b0a9b180d7e571f02dfb18bf5f1b618e238454b685", - "sha256:d08395595c42bcd82c3608762ce734504c6d025eef1c06f42326a6023a584186", - "sha256:d4639111e73997567343df6551da9dd90d66aece1b9fc26c786d328439488103", - "sha256:d63787f289944cc4bde518ad2b5e70a4f0d6e2ce76324635359c74c113fd188f", - "sha256:d6d5f061f6a2aa55790b9e64a23dfd87b6664ab56e24cd06c78eb43986cb260b", - "sha256:d7865df1fb564092bcf46dac61b5def25342faf6352e4bc0e61a286e3fa26a3d", - "sha256:db6585b600b2e76e98131e0ac0e5195759082b51687ad0c94505970c90718f4a", - "sha256:e36d7369363d2707d5f68950a64c4e025991eb0177db01ccb6aa6facae48b69f", - "sha256:e7947d9a6264c727a556541b1630296bbd5d0a05068d21c38dde8e7a1c703ef0", - "sha256:eb2d59bc196e6d3b1827c7db06c1a898bfa0787c0574af398e65ccf2e97c0fbe", - "sha256:ee9c2f6ca9774c2c24bbf7b23086264e6b5fa178201450535ec0859739e6f78d", - "sha256:f4760e1b02173f4155203054f77a5dc0b4078de7645c922b208d28e7eb99f3e2", - "sha256:f70bec8a14a692be6dbe7ce8aab303e88df891cbd4a39af091f90b6702e28055", - "sha256:f869e34d2326e417baee430ae998e91412cc8e7fdd83d979277a90a0e79a5b47", - "sha256:f8b9a7cd381970e64849070aca7c32d53ab7d96c66db6c2ef7aa23c6e803f514", - "sha256:f99d74ddf9d3b6126b509e81865f89bd1283e3fc1b568b68cd7bd9dfa15583d7", - "sha256:f9e7e493ded7042712a374471203dd43ae3fff5b81e3de1a0513fa241af9fd41", - "sha256:fc72ae476732cdb7b2c1acb5af23b478b8a0d4b6fcf19b90dd150291e0d5b26b", - "sha256:fccbf0cd3411719e4c9426755df90bf3449d9fc5a89f077f4a7f1abd4f70c910", - "sha256:ffcf18ad3edf1c170e27e88b10282a2c449aa0358659592462448d71b2000cfc" + "sha256:015de2ce2af1586ff5dc873e804434185199a15f7d96920ce67e50604592cae9", + "sha256:061c3ff1f51ecec256e916cf71cc01f9975af8fb3af9b94d3c0cc8702cfea637", + "sha256:08a80cf4884920863623a9ee9a285ee04cef57ebedc1cc87b3e3e0f24c8acfe5", + "sha256:09362f86ec201288d5687d1dc476b07bf39c08478cde837cb710b302864e7ec9", + "sha256:0bb4f48bd0dd18eebe826395e6a48b7331291078a879295bae4e5d053be50d4c", + "sha256:106af1653007cc569d5fbb5f08c6648a49fe4de74c2df814e234e282ebc06957", + "sha256:11fdd1192240dda8d6c5d18a06146e9045cb7e3ba7c06de6973000ff035df7c6", + "sha256:16a472300bc6c83fe4c2072cc22b3972f90d718d56f241adabc7ae509f53f154", + "sha256:176287bb998fd1e9846a9b666e240e58f8d3373e3bf87e7642f15af5405187b8", + "sha256:177914f81f66c86c012311f8c7f46887ec375cfcfd2a2f28233a3053ac93a569", + "sha256:177c9dd834cdf4dc39c27436ade6fdf9fe81484758885f2d616d5d03c0a83bd2", + "sha256:187700668c018a7e76e89424b7c1042f317c8df9161f00c0c903c82b0a8cac5c", + "sha256:1d9b5ee46dcb498fa3e46d4dfabcb531e1f2e76b477e0d99ef114f17bbd38453", + "sha256:22da15b902f9f8e267020d1c8bcfc4831ca646fecb60254f7bc71763569f56b1", + "sha256:24cd91a03543a0f8d09cb18d1cb27df80a84b5553d2bd94cba5979ef6af5c6e7", + "sha256:255f1a10ae39b52122cce26ce0781f7a616f502feecce9e616976f6a87992d6b", + "sha256:271c360fdc464fe6a75f13ea0c08ddf71a321f4c55fc20a3fe62ea3ef09df7d9", + "sha256:2ed83d53a8c5902ec48b90b2ac045e28e1698c0bea9441af9409fc844dc79496", + "sha256:2f3e1867dd574014253b4b8f01ba443b9c914e61d45f3674e452a915d6e929a3", + "sha256:35fbd23c1c8732cde7a94abe7fb071ec173c2f58c0bd0d7e5b669fdfc80a2c7b", + "sha256:37d0c59548ae56fae01c14998918d04ee0d5d3277363c10208eef8c4e2b68ed6", + "sha256:39d05e65f23a0fe897b6ac395f2a8d48c56ac0f583f5d663e0afec1da89b95da", + "sha256:3ad59efe24a4d54c2742929001f2d02803aafc15d6d781c21379e3f7f66ec842", + "sha256:3aed39db2f0ace76faa94f465d4234aac72e2f32b009f15da6492a561b3bbebd", + "sha256:3bbac1953c17252f9cc675bb19372444aadf0179b5df575ac4b56faaec9f6294", + "sha256:40bc802a696887b14c002edd43c18082cb7b6f9ee8b838239b03b56574d97f71", + "sha256:42f712b4668831c0cd85e0a5b5a308700fe068e37dcd24c0062904c4e372b093", + "sha256:448a66b8266de0b581246ca7cd6a73b8d98d15100fb7165974535fa3b577340e", + "sha256:485301ee56ce87a51ccb182a4b180d852c5cb2b3cb3a82f7d4714b4141119d8c", + "sha256:485747ee62da83366a44fbba963c5fe017860ad408ccd6cd99aa66ea80d32b2e", + "sha256:4cf0855a842c5b5c391dd32ca273b09e86abf8367572073bd1edfc52bc44446b", + "sha256:4eca20917a06d2fca7628ef3c8b94a8c358f6b43f1a621c9815243462dcccf97", + "sha256:4ed172d0c79f156c1b954e99c03bc2e3033c17efce8dd1a7c781bc4d5793dfac", + "sha256:5267cfda873ad62591b9332fd9472d2409f7cf02a34a9c9cb367e2c0255994bf", + "sha256:52b5cbc0469328e58180021138207e6ec91d7ca2e037d3549cc9e34e2187330a", + "sha256:53d7a3cd46cdc1689296348cb05ffd4f4280035770aee0c8ead3bbd4d6529acc", + "sha256:563646d74a4b4456d0cf3b714ca522e725243c603e8254ad85c3b59b7c0c4bf0", + "sha256:570cc326e78ff23dec7f41487aa9c3dffd02e5ee9ab43a8f6ccc3df8f9327623", + "sha256:5aca759ada6b1967fcfd4336dcf460d02a8a23e6abe06e90ea7881e5c22c4de6", + "sha256:5de11c041486681ce854c814844f4ce3282b6ea1656faae19208ebe09d31c5b8", + "sha256:5e271dd97c7bb8eefda5cca38cd0b0373a1fea50f71e8071376b46968582af9b", + "sha256:642ed0a209ced4be3a46f8cb094f2d76f1f479e2a1ceca6de6346a096cd3409d", + "sha256:6446002739ca29249f0beaaf067fcbc2b5aab4bc7ee8fb941bd194947ce19aff", + "sha256:691d50c99a937709ac4c4cd570d959a006bd6a6d970a484c84cc99543d4a5bbb", + "sha256:69b857a7d8bd4f5d6e0db4086da8c46309a26e8cefdfc778c0c5cc17d4b11e08", + "sha256:6ac3fefb0d168c7c6cab24fdfc80ec62cd2b4dfd9e65b84bdceb1cb01d385c33", + "sha256:6c9141af27a4e5819d74d67d227d5047a20fa3c7d4d9df43037a955b4c748ec5", + "sha256:7170cbde4070dc3c77dec82abf86f3b210633d4f89550fa0ad2d4b549a05572a", + "sha256:763ad59e105fca09705d9f9b29ecffb95ecdc3b0363be3bb56081b2c6de7977a", + "sha256:77076bdc8776a2b029e1e6ffbe6d7056e35f56f5e80d9dc0bad26ad4a024a762", + "sha256:7cd020b1fb41e3ab7716d4d2c3972d4588fdfbab9bfbbb64acc7078eccef8860", + "sha256:821392559d37759caa67d622d0d2994c7a3f2fb29274948ac799d496d92bca73", + "sha256:829e91f3a8574888b73e7a3feb3b1af698e717513597e23136ff4eba0bc8387a", + "sha256:850c272e0e0d1a5c5d73b1b7871b0a7c2446b304cec55ccdb3eaac0d792bb065", + "sha256:87d9b206b1bd7a0523375dc2020a6ce88bca5330682ae2fe25e86fd5d45cea9c", + "sha256:8bd01ff4032abaed03f2db702fa9a61078bee37add0bd884a6190b05e63b028c", + "sha256:8d54bbdf5d56e2c8cf81a1857250f3ea132de77af543d0ba5dce667183b61fec", + "sha256:8efaeb08ede95066da3a3e3c420fcc0a21693fcd0c4396d0585b019613d28515", + "sha256:8f94fdd756ba1f79f988855d948ae0bad9ddf44df296770d9a58c774cfbcca72", + "sha256:95cde244e7195b2c07ec9b73fa4c5026d4a27233451485caa1cd0c1b55f26dbd", + "sha256:975382d9aa90dc59253d6a83a5ca72e07f4ada3ae3d6c0575ced513db322b8ec", + "sha256:9dd9d9d9e898b9d30683bdd2b6c1849449158647d1049a125879cb397ee9cd12", + "sha256:a019a344312d0b1f429c00d49c3be62fa273d4a1094e1b224f403716b6d03be1", + "sha256:a4d9bfda3f84fc563868fe25ca160c8ff0e69bc4443c5647f960d59400ce6557", + "sha256:a657250807b6efd19b28f5922520ae002a54cb43c2401e6f3d0230c352564d25", + "sha256:a771417c9c06c56c9d53d11a5b084d1de75de82978e23c544270ab25e7c066ff", + "sha256:aad6ed9e70ddfb34d849b761fb243be58c735be6a9265b9060d6ddb77751e3e8", + "sha256:ae87137951bb3dc08c7d8bfb8988d8c119f3230731b08a71146e84aaa919a7a9", + "sha256:af247fd4f12cca4129c1b82090244ea5a9d5bb089e9a82feb5a2f7c6a9fe181d", + "sha256:b5d4bdd697195f3876d134101c40c7d06d46c6ab25159ed5cbd44105c715278a", + "sha256:b9255e7165083de7c1d605e818025e8860636348f34a79d84ec533546064f07e", + "sha256:c22211c165166de6683de8136229721f3d5c8606cc2c3d1562da9a3a5058049c", + "sha256:c55f9821f88e8bee4b7a72c82cfb5ecd22b6aad04033334f33c329b29bfa4da0", + "sha256:c7aed97f2e676561416c927b063802c8a6285e9b55e1b83213dfd99a8f4f9e48", + "sha256:cd2163f42868865597d89399a01aa33b7594ce8e2c4a28503127c81a2f17784e", + "sha256:ce5e7504db95b76fc89055c7f41e367eaadef5b1d059e27e1d6eabf2b55ca314", + "sha256:cff7351c251c7546407827b6a37bcef6416304fc54d12d44dbfecbb717064717", + "sha256:d27aa6bbc1f33be920bb7adbb95581452cdf23005d5611b29a12bb6a3468cc95", + "sha256:d3b52a67ac66a3a64a7e710ba629f62d1e26ca0504c29ee8cbd99b97df7079a8", + "sha256:de61e424062173b4f70eec07e12469edde7e17fa180019a2a0d75c13a5c5dc57", + "sha256:e10e6a1ed2b8661201e79dff5531f8ad4cdd83548a0f81c95cf79b3184b20c33", + "sha256:e1a0ffc39f51aa5f5c22114a8f1906b3c17eba68c5babb86c5f77d8b1bba14d1", + "sha256:e22491d25f97199fc3581ad8dd8ce198d8c8fdb8dae80dea3512e1ce6d5fa99f", + "sha256:e626b864725680cd3904414d72e7b0bd81c0e5b2b53a5b30b4273034253bb41f", + "sha256:e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9", + "sha256:e8d0f0eca087630d58b8c662085529781fd5dc80f0a54eda42d5c9029f812599", + "sha256:ea65b59882d5fa8c74a23f8960db579e5e341534934f43f3b18ec1839b893e41", + "sha256:ea93163472db26ac6043e8f7f93a05d9b59e0505c760da2a3cd22c7dd7111391", + "sha256:eab75a8569a095f2ad470b342f2751d9902f7944704f0571c8af46bede438475", + "sha256:ed8313809571a5463fd7db43aaca68ecb43ca7a58f5b23b6e6c6c5d02bdc7882", + "sha256:ef5fddfb264e89c435be4adb3953cef5d2936fdeb4463b4161a6ba2f22e7b740", + "sha256:ef750a20de1b65657a1425f77c525b0183eac63fe7b8f5ac0dd16f3668d3e64f", + "sha256:efb9ece97e696bb56e31166a9dd7919f8f0c6b31967b454718c6509f29ef6fee", + "sha256:f4c179a7aeae10ddf44c6bac87938134c1379c49c884529f090f9bf05566c836", + "sha256:f602881d80ee4228a2355c68da6b296a296cd22bbb91e5418d54577bbf17fa7c", + "sha256:fc2200e79d75b5238c8d69f6a30f8284290c777039d331e7340b6c17cad24a5a", + "sha256:fcc1ebb7561a3e24a6588f7c6ded15d80aec22c66a070c757559b57b17ffd1cb" ], "markers": "python_version >= '3.8'", - "version": "==0.10.0" + "version": "==0.10.3" }, "s3transfer": { "hashes": [ @@ -483,11 +491,11 @@ "falcon" ], "hashes": [ - "sha256:2e53ad63f96bb9da6570ba2e755c267e529edcf58580a2c0d2a11ef26e1e678b", - "sha256:7dc873b87e1faf4d00614afd1058bfa1522942f33daef8a59f90de8ed75cd10c" + "sha256:64a7141005fb775b9db298a30de93e3b83e0ddd1232dc6f36eb38aebc1553291", + "sha256:6de2e88304873484207fed836388e422aeff000609b104c802749fd89d56ba5b" ], "index": "pypi", - "version": "==1.30.0" + "version": "==1.31.0" }, "six": { "hashes": [ @@ -499,11 +507,11 @@ }, "tldextract": { "hashes": [ - "sha256:581e7dbefc90e7bb857bb6f768d25c811a3c5f0892ed56a9a2999ddb7b1b70c2", - "sha256:5fe3210c577463545191d45ad522d3d5e78d55218ce97215e82004dcae1e1234" + "sha256:2cb271ca8d06ea1630a1361b58edad14e0cf81f34ce3c90b052854528fe2a281", + "sha256:4df1c65b95be61d59428e8611e955e54e6f1d4483d3e8d5733d3a9062155e910" ], "index": "pypi", - "version": "==3.4.4" + "version": "==3.5.0" }, "urllib3": { "hashes": [ @@ -515,20 +523,12 @@ } }, "develop": { - "appnope": { - "hashes": [ - "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", - "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" - ], - "markers": "sys_platform == 'darwin'", - "version": "==0.1.3" - }, "asttokens": { "hashes": [ - "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3", - "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c" + "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e", + "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69" ], - "version": "==2.2.1" + "version": "==2.4.0" }, "attrs": { "hashes": [ @@ -674,7 +674,7 @@ "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac", "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88" ], - "markers": "python_version >= '3.7'", + "markers": "python_full_version >= '3.7.0'", "version": "==3.0.39" }, "ptyprocess": { @@ -701,11 +701,11 @@ }, "pytest": { "hashes": [ - "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32", - "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a" + "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002", + "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069" ], "index": "pypi", - "version": "==7.4.0" + "version": "==7.4.2" }, "pytest-order": { "hashes": [ @@ -741,106 +741,106 @@ }, "rpds-py": { "hashes": [ - "sha256:00215f6a9058fbf84f9d47536902558eb61f180a6b2a0fa35338d06ceb9a2e5a", - "sha256:0028eb0967942d0d2891eae700ae1a27b7fd18604cfcb16a1ef486a790fee99e", - "sha256:0155c33af0676fc38e1107679be882077680ad1abb6303956b97259c3177e85e", - "sha256:063411228b852fb2ed7485cf91f8e7d30893e69b0acb207ec349db04cccc8225", - "sha256:0700c2133ba203c4068aaecd6a59bda22e06a5e46255c9da23cbf68c6942215d", - "sha256:08e08ccf5b10badb7d0a5c84829b914c6e1e1f3a716fdb2bf294e2bd01562775", - "sha256:0d292cabd7c8335bdd3237ded442480a249dbcdb4ddfac5218799364a01a0f5c", - "sha256:15932ec5f224b0e35764dc156514533a4fca52dcfda0dfbe462a1a22b37efd59", - "sha256:18f87baa20e02e9277ad8960cd89b63c79c05caf106f4c959a9595c43f2a34a5", - "sha256:1a6420a36975e0073acaeee44ead260c1f6ea56812cfc6c31ec00c1c48197173", - "sha256:1b401e8b9aece651512e62c431181e6e83048a651698a727ea0eb0699e9f9b74", - "sha256:1d7b7b71bcb82d8713c7c2e9c5f061415598af5938666beded20d81fa23e7640", - "sha256:23750a9b8a329844ba1fe267ca456bb3184984da2880ed17ae641c5af8de3fef", - "sha256:23a059143c1393015c68936370cce11690f7294731904bdae47cc3e16d0b2474", - "sha256:26d9fd624649a10e4610fab2bc820e215a184d193e47d0be7fe53c1c8f67f370", - "sha256:291c9ce3929a75b45ce8ddde2aa7694fc8449f2bc8f5bd93adf021efaae2d10b", - "sha256:298e8b5d8087e0330aac211c85428c8761230ef46a1f2c516d6a2f67fb8803c5", - "sha256:2c7c4266c1b61eb429e8aeb7d8ed6a3bfe6c890a1788b18dbec090c35c6b93fa", - "sha256:2d68a8e8a3a816629283faf82358d8c93fe5bd974dd2704152394a3de4cec22a", - "sha256:344b89384c250ba6a4ce1786e04d01500e4dac0f4137ceebcaad12973c0ac0b3", - "sha256:3455ecc46ea443b5f7d9c2f946ce4017745e017b0d0f8b99c92564eff97e97f5", - "sha256:3d544a614055b131111bed6edfa1cb0fb082a7265761bcb03321f2dd7b5c6c48", - "sha256:3e5c26905aa651cc8c0ddc45e0e5dea2a1296f70bdc96af17aee9d0493280a17", - "sha256:3f5cc8c7bc99d2bbcd704cef165ca7d155cd6464c86cbda8339026a42d219397", - "sha256:4992266817169997854f81df7f6db7bdcda1609972d8ffd6919252f09ec3c0f6", - "sha256:4d55528ef13af4b4e074d067977b1f61408602f53ae4537dccf42ba665c2c7bd", - "sha256:576da63eae7809f375932bfcbca2cf20620a1915bf2fedce4b9cc8491eceefe3", - "sha256:58fc4d66ee349a23dbf08c7e964120dc9027059566e29cf0ce6205d590ed7eca", - "sha256:5b9bf77008f2c55dabbd099fd3ac87009471d223a1c7ebea36873d39511b780a", - "sha256:5e7996aed3f65667c6dcc8302a69368435a87c2364079a066750a2eac75ea01e", - "sha256:5f7487be65b9c2c510819e744e375bd41b929a97e5915c4852a82fbb085df62c", - "sha256:6388e4e95a26717b94a05ced084e19da4d92aca883f392dffcf8e48c8e221a24", - "sha256:65af12f70355de29e1092f319f85a3467f4005e959ab65129cb697169ce94b86", - "sha256:668d2b45d62c68c7a370ac3dce108ffda482b0a0f50abd8b4c604a813a59e08f", - "sha256:71333c22f7cf5f0480b59a0aef21f652cf9bbaa9679ad261b405b65a57511d1e", - "sha256:7150b83b3e3ddaac81a8bb6a9b5f93117674a0e7a2b5a5b32ab31fdfea6df27f", - "sha256:748e472345c3a82cfb462d0dff998a7bf43e621eed73374cb19f307e97e08a83", - "sha256:75dbfd41a61bc1fb0536bf7b1abf272dc115c53d4d77db770cd65d46d4520882", - "sha256:7618a082c55cf038eede4a918c1001cc8a4411dfe508dc762659bcd48d8f4c6e", - "sha256:780fcb855be29153901c67fc9c5633d48aebef21b90aa72812fa181d731c6b00", - "sha256:78d10c431073dc6ebceed35ab22948a016cc2b5120963c13a41e38bdde4a7212", - "sha256:7a3a3d3e4f1e3cd2a67b93a0b6ed0f2499e33f47cc568e3a0023e405abdc0ff1", - "sha256:7b6975d3763d0952c111700c0634968419268e6bbc0b55fe71138987fa66f309", - "sha256:80772e3bda6787510d9620bc0c7572be404a922f8ccdfd436bf6c3778119464c", - "sha256:80992eb20755701753e30a6952a96aa58f353d12a65ad3c9d48a8da5ec4690cf", - "sha256:841128a22e6ac04070a0f84776d07e9c38c4dcce8e28792a95e45fc621605517", - "sha256:861d25ae0985a1dd5297fee35f476b60c6029e2e6e19847d5b4d0a43a390b696", - "sha256:872f3dcaa8bf2245944861d7311179d2c0c9b2aaa7d3b464d99a7c2e401f01fa", - "sha256:87c93b25d538c433fb053da6228c6290117ba53ff6a537c133b0f2087948a582", - "sha256:8856aa76839dc234d3469f1e270918ce6bec1d6a601eba928f45d68a15f04fc3", - "sha256:885e023e73ce09b11b89ab91fc60f35d80878d2c19d6213a32b42ff36543c291", - "sha256:899b5e7e2d5a8bc92aa533c2d4e55e5ebba095c485568a5e4bedbc163421259a", - "sha256:8ce8caa29ebbdcde67e5fd652c811d34bc01f249dbc0d61e5cc4db05ae79a83b", - "sha256:8e1c68303ccf7fceb50fbab79064a2636119fd9aca121f28453709283dbca727", - "sha256:8e7e2b3577e97fa43c2c2b12a16139b2cedbd0770235d5179c0412b4794efd9b", - "sha256:92f05fc7d832e970047662b3440b190d24ea04f8d3c760e33e7163b67308c878", - "sha256:97f5811df21703446b42303475b8b855ee07d6ab6cdf8565eff115540624f25d", - "sha256:9affee8cb1ec453382c27eb9043378ab32f49cd4bc24a24275f5c39bf186c279", - "sha256:a2da4a8c6d465fde36cea7d54bf47b5cf089073452f0e47c8632ecb9dec23c07", - "sha256:a6903cdca64f1e301af9be424798328c1fe3b4b14aede35f04510989fc72f012", - "sha256:a8ab1adf04ae2d6d65835995218fd3f3eb644fe20655ca8ee233e2c7270ff53b", - "sha256:a8edd467551c1102dc0f5754ab55cd0703431cd3044edf8c8e7d9208d63fa453", - "sha256:ac00c41dd315d147b129976204839ca9de699d83519ff1272afbe4fb9d362d12", - "sha256:ad277f74b1c164f7248afa968700e410651eb858d7c160d109fb451dc45a2f09", - "sha256:ae46a50d235f1631d9ec4670503f7b30405103034830bc13df29fd947207f795", - "sha256:afe6b5a04b2ab1aa89bad32ca47bf71358e7302a06fdfdad857389dca8fb5f04", - "sha256:b1cb078f54af0abd835ca76f93a3152565b73be0f056264da45117d0adf5e99c", - "sha256:b25136212a3d064a8f0b9ebbb6c57094c5229e0de76d15c79b76feff26aeb7b8", - "sha256:b3226b246facae14909b465061ddcfa2dfeadb6a64f407f24300d42d69bcb1a1", - "sha256:b98e75b21fc2ba5285aef8efaf34131d16af1c38df36bdca2f50634bea2d3060", - "sha256:bbd7b24d108509a1b9b6679fcc1166a7dd031dbef1f3c2c73788f42e3ebb3beb", - "sha256:bed57543c99249ab3a4586ddc8786529fbc33309e5e8a1351802a06ca2baf4c2", - "sha256:c0583f69522732bdd79dca4cd3873e63a29acf4a299769c7541f2ca1e4dd4bc6", - "sha256:c1e0e9916301e3b3d970814b1439ca59487f0616d30f36a44cead66ee1748c31", - "sha256:c651847545422c8131660704c58606d841e228ed576c8f1666d98b3d318f89da", - "sha256:c7853f27195598e550fe089f78f0732c66ee1d1f0eaae8ad081589a5a2f5d4af", - "sha256:cbae50d352e4717ffc22c566afc2d0da744380e87ed44a144508e3fb9114a3f4", - "sha256:cdbed8f21204398f47de39b0a9b180d7e571f02dfb18bf5f1b618e238454b685", - "sha256:d08395595c42bcd82c3608762ce734504c6d025eef1c06f42326a6023a584186", - "sha256:d4639111e73997567343df6551da9dd90d66aece1b9fc26c786d328439488103", - "sha256:d63787f289944cc4bde518ad2b5e70a4f0d6e2ce76324635359c74c113fd188f", - "sha256:d6d5f061f6a2aa55790b9e64a23dfd87b6664ab56e24cd06c78eb43986cb260b", - "sha256:d7865df1fb564092bcf46dac61b5def25342faf6352e4bc0e61a286e3fa26a3d", - "sha256:db6585b600b2e76e98131e0ac0e5195759082b51687ad0c94505970c90718f4a", - "sha256:e36d7369363d2707d5f68950a64c4e025991eb0177db01ccb6aa6facae48b69f", - "sha256:e7947d9a6264c727a556541b1630296bbd5d0a05068d21c38dde8e7a1c703ef0", - "sha256:eb2d59bc196e6d3b1827c7db06c1a898bfa0787c0574af398e65ccf2e97c0fbe", - "sha256:ee9c2f6ca9774c2c24bbf7b23086264e6b5fa178201450535ec0859739e6f78d", - "sha256:f4760e1b02173f4155203054f77a5dc0b4078de7645c922b208d28e7eb99f3e2", - "sha256:f70bec8a14a692be6dbe7ce8aab303e88df891cbd4a39af091f90b6702e28055", - "sha256:f869e34d2326e417baee430ae998e91412cc8e7fdd83d979277a90a0e79a5b47", - "sha256:f8b9a7cd381970e64849070aca7c32d53ab7d96c66db6c2ef7aa23c6e803f514", - "sha256:f99d74ddf9d3b6126b509e81865f89bd1283e3fc1b568b68cd7bd9dfa15583d7", - "sha256:f9e7e493ded7042712a374471203dd43ae3fff5b81e3de1a0513fa241af9fd41", - "sha256:fc72ae476732cdb7b2c1acb5af23b478b8a0d4b6fcf19b90dd150291e0d5b26b", - "sha256:fccbf0cd3411719e4c9426755df90bf3449d9fc5a89f077f4a7f1abd4f70c910", - "sha256:ffcf18ad3edf1c170e27e88b10282a2c449aa0358659592462448d71b2000cfc" + "sha256:015de2ce2af1586ff5dc873e804434185199a15f7d96920ce67e50604592cae9", + "sha256:061c3ff1f51ecec256e916cf71cc01f9975af8fb3af9b94d3c0cc8702cfea637", + "sha256:08a80cf4884920863623a9ee9a285ee04cef57ebedc1cc87b3e3e0f24c8acfe5", + "sha256:09362f86ec201288d5687d1dc476b07bf39c08478cde837cb710b302864e7ec9", + "sha256:0bb4f48bd0dd18eebe826395e6a48b7331291078a879295bae4e5d053be50d4c", + "sha256:106af1653007cc569d5fbb5f08c6648a49fe4de74c2df814e234e282ebc06957", + "sha256:11fdd1192240dda8d6c5d18a06146e9045cb7e3ba7c06de6973000ff035df7c6", + "sha256:16a472300bc6c83fe4c2072cc22b3972f90d718d56f241adabc7ae509f53f154", + "sha256:176287bb998fd1e9846a9b666e240e58f8d3373e3bf87e7642f15af5405187b8", + "sha256:177914f81f66c86c012311f8c7f46887ec375cfcfd2a2f28233a3053ac93a569", + "sha256:177c9dd834cdf4dc39c27436ade6fdf9fe81484758885f2d616d5d03c0a83bd2", + "sha256:187700668c018a7e76e89424b7c1042f317c8df9161f00c0c903c82b0a8cac5c", + "sha256:1d9b5ee46dcb498fa3e46d4dfabcb531e1f2e76b477e0d99ef114f17bbd38453", + "sha256:22da15b902f9f8e267020d1c8bcfc4831ca646fecb60254f7bc71763569f56b1", + "sha256:24cd91a03543a0f8d09cb18d1cb27df80a84b5553d2bd94cba5979ef6af5c6e7", + "sha256:255f1a10ae39b52122cce26ce0781f7a616f502feecce9e616976f6a87992d6b", + "sha256:271c360fdc464fe6a75f13ea0c08ddf71a321f4c55fc20a3fe62ea3ef09df7d9", + "sha256:2ed83d53a8c5902ec48b90b2ac045e28e1698c0bea9441af9409fc844dc79496", + "sha256:2f3e1867dd574014253b4b8f01ba443b9c914e61d45f3674e452a915d6e929a3", + "sha256:35fbd23c1c8732cde7a94abe7fb071ec173c2f58c0bd0d7e5b669fdfc80a2c7b", + "sha256:37d0c59548ae56fae01c14998918d04ee0d5d3277363c10208eef8c4e2b68ed6", + "sha256:39d05e65f23a0fe897b6ac395f2a8d48c56ac0f583f5d663e0afec1da89b95da", + "sha256:3ad59efe24a4d54c2742929001f2d02803aafc15d6d781c21379e3f7f66ec842", + "sha256:3aed39db2f0ace76faa94f465d4234aac72e2f32b009f15da6492a561b3bbebd", + "sha256:3bbac1953c17252f9cc675bb19372444aadf0179b5df575ac4b56faaec9f6294", + "sha256:40bc802a696887b14c002edd43c18082cb7b6f9ee8b838239b03b56574d97f71", + "sha256:42f712b4668831c0cd85e0a5b5a308700fe068e37dcd24c0062904c4e372b093", + "sha256:448a66b8266de0b581246ca7cd6a73b8d98d15100fb7165974535fa3b577340e", + "sha256:485301ee56ce87a51ccb182a4b180d852c5cb2b3cb3a82f7d4714b4141119d8c", + "sha256:485747ee62da83366a44fbba963c5fe017860ad408ccd6cd99aa66ea80d32b2e", + "sha256:4cf0855a842c5b5c391dd32ca273b09e86abf8367572073bd1edfc52bc44446b", + "sha256:4eca20917a06d2fca7628ef3c8b94a8c358f6b43f1a621c9815243462dcccf97", + "sha256:4ed172d0c79f156c1b954e99c03bc2e3033c17efce8dd1a7c781bc4d5793dfac", + "sha256:5267cfda873ad62591b9332fd9472d2409f7cf02a34a9c9cb367e2c0255994bf", + "sha256:52b5cbc0469328e58180021138207e6ec91d7ca2e037d3549cc9e34e2187330a", + "sha256:53d7a3cd46cdc1689296348cb05ffd4f4280035770aee0c8ead3bbd4d6529acc", + "sha256:563646d74a4b4456d0cf3b714ca522e725243c603e8254ad85c3b59b7c0c4bf0", + "sha256:570cc326e78ff23dec7f41487aa9c3dffd02e5ee9ab43a8f6ccc3df8f9327623", + "sha256:5aca759ada6b1967fcfd4336dcf460d02a8a23e6abe06e90ea7881e5c22c4de6", + "sha256:5de11c041486681ce854c814844f4ce3282b6ea1656faae19208ebe09d31c5b8", + "sha256:5e271dd97c7bb8eefda5cca38cd0b0373a1fea50f71e8071376b46968582af9b", + "sha256:642ed0a209ced4be3a46f8cb094f2d76f1f479e2a1ceca6de6346a096cd3409d", + "sha256:6446002739ca29249f0beaaf067fcbc2b5aab4bc7ee8fb941bd194947ce19aff", + "sha256:691d50c99a937709ac4c4cd570d959a006bd6a6d970a484c84cc99543d4a5bbb", + "sha256:69b857a7d8bd4f5d6e0db4086da8c46309a26e8cefdfc778c0c5cc17d4b11e08", + "sha256:6ac3fefb0d168c7c6cab24fdfc80ec62cd2b4dfd9e65b84bdceb1cb01d385c33", + "sha256:6c9141af27a4e5819d74d67d227d5047a20fa3c7d4d9df43037a955b4c748ec5", + "sha256:7170cbde4070dc3c77dec82abf86f3b210633d4f89550fa0ad2d4b549a05572a", + "sha256:763ad59e105fca09705d9f9b29ecffb95ecdc3b0363be3bb56081b2c6de7977a", + "sha256:77076bdc8776a2b029e1e6ffbe6d7056e35f56f5e80d9dc0bad26ad4a024a762", + "sha256:7cd020b1fb41e3ab7716d4d2c3972d4588fdfbab9bfbbb64acc7078eccef8860", + "sha256:821392559d37759caa67d622d0d2994c7a3f2fb29274948ac799d496d92bca73", + "sha256:829e91f3a8574888b73e7a3feb3b1af698e717513597e23136ff4eba0bc8387a", + "sha256:850c272e0e0d1a5c5d73b1b7871b0a7c2446b304cec55ccdb3eaac0d792bb065", + "sha256:87d9b206b1bd7a0523375dc2020a6ce88bca5330682ae2fe25e86fd5d45cea9c", + "sha256:8bd01ff4032abaed03f2db702fa9a61078bee37add0bd884a6190b05e63b028c", + "sha256:8d54bbdf5d56e2c8cf81a1857250f3ea132de77af543d0ba5dce667183b61fec", + "sha256:8efaeb08ede95066da3a3e3c420fcc0a21693fcd0c4396d0585b019613d28515", + "sha256:8f94fdd756ba1f79f988855d948ae0bad9ddf44df296770d9a58c774cfbcca72", + "sha256:95cde244e7195b2c07ec9b73fa4c5026d4a27233451485caa1cd0c1b55f26dbd", + "sha256:975382d9aa90dc59253d6a83a5ca72e07f4ada3ae3d6c0575ced513db322b8ec", + "sha256:9dd9d9d9e898b9d30683bdd2b6c1849449158647d1049a125879cb397ee9cd12", + "sha256:a019a344312d0b1f429c00d49c3be62fa273d4a1094e1b224f403716b6d03be1", + "sha256:a4d9bfda3f84fc563868fe25ca160c8ff0e69bc4443c5647f960d59400ce6557", + "sha256:a657250807b6efd19b28f5922520ae002a54cb43c2401e6f3d0230c352564d25", + "sha256:a771417c9c06c56c9d53d11a5b084d1de75de82978e23c544270ab25e7c066ff", + "sha256:aad6ed9e70ddfb34d849b761fb243be58c735be6a9265b9060d6ddb77751e3e8", + "sha256:ae87137951bb3dc08c7d8bfb8988d8c119f3230731b08a71146e84aaa919a7a9", + "sha256:af247fd4f12cca4129c1b82090244ea5a9d5bb089e9a82feb5a2f7c6a9fe181d", + "sha256:b5d4bdd697195f3876d134101c40c7d06d46c6ab25159ed5cbd44105c715278a", + "sha256:b9255e7165083de7c1d605e818025e8860636348f34a79d84ec533546064f07e", + "sha256:c22211c165166de6683de8136229721f3d5c8606cc2c3d1562da9a3a5058049c", + "sha256:c55f9821f88e8bee4b7a72c82cfb5ecd22b6aad04033334f33c329b29bfa4da0", + "sha256:c7aed97f2e676561416c927b063802c8a6285e9b55e1b83213dfd99a8f4f9e48", + "sha256:cd2163f42868865597d89399a01aa33b7594ce8e2c4a28503127c81a2f17784e", + "sha256:ce5e7504db95b76fc89055c7f41e367eaadef5b1d059e27e1d6eabf2b55ca314", + "sha256:cff7351c251c7546407827b6a37bcef6416304fc54d12d44dbfecbb717064717", + "sha256:d27aa6bbc1f33be920bb7adbb95581452cdf23005d5611b29a12bb6a3468cc95", + "sha256:d3b52a67ac66a3a64a7e710ba629f62d1e26ca0504c29ee8cbd99b97df7079a8", + "sha256:de61e424062173b4f70eec07e12469edde7e17fa180019a2a0d75c13a5c5dc57", + "sha256:e10e6a1ed2b8661201e79dff5531f8ad4cdd83548a0f81c95cf79b3184b20c33", + "sha256:e1a0ffc39f51aa5f5c22114a8f1906b3c17eba68c5babb86c5f77d8b1bba14d1", + "sha256:e22491d25f97199fc3581ad8dd8ce198d8c8fdb8dae80dea3512e1ce6d5fa99f", + "sha256:e626b864725680cd3904414d72e7b0bd81c0e5b2b53a5b30b4273034253bb41f", + "sha256:e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9", + "sha256:e8d0f0eca087630d58b8c662085529781fd5dc80f0a54eda42d5c9029f812599", + "sha256:ea65b59882d5fa8c74a23f8960db579e5e341534934f43f3b18ec1839b893e41", + "sha256:ea93163472db26ac6043e8f7f93a05d9b59e0505c760da2a3cd22c7dd7111391", + "sha256:eab75a8569a095f2ad470b342f2751d9902f7944704f0571c8af46bede438475", + "sha256:ed8313809571a5463fd7db43aaca68ecb43ca7a58f5b23b6e6c6c5d02bdc7882", + "sha256:ef5fddfb264e89c435be4adb3953cef5d2936fdeb4463b4161a6ba2f22e7b740", + "sha256:ef750a20de1b65657a1425f77c525b0183eac63fe7b8f5ac0dd16f3668d3e64f", + "sha256:efb9ece97e696bb56e31166a9dd7919f8f0c6b31967b454718c6509f29ef6fee", + "sha256:f4c179a7aeae10ddf44c6bac87938134c1379c49c884529f090f9bf05566c836", + "sha256:f602881d80ee4228a2355c68da6b296a296cd22bbb91e5418d54577bbf17fa7c", + "sha256:fc2200e79d75b5238c8d69f6a30f8284290c777039d331e7340b6c17cad24a5a", + "sha256:fcc1ebb7561a3e24a6588f7c6ded15d80aec22c66a070c757559b57b17ffd1cb" ], "markers": "python_version >= '3.8'", - "version": "==0.10.0" + "version": "==0.10.3" }, "six": { "hashes": [ @@ -867,11 +867,11 @@ }, "traitlets": { "hashes": [ - "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8", - "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9" + "sha256:417745a96681fbb358e723d5346a547521f36e9bd0d50ba7ab368fff5d67aa54", + "sha256:f584ea209240466e66e91f3c81aa7d004ba4cf794990b0c775938a1544217cd1" ], - "markers": "python_version >= '3.7'", - "version": "==5.9.0" + "markers": "python_version >= '3.8'", + "version": "==5.10.0" }, "wcwidth": { "hashes": [ diff --git a/ingestion_server/ingestion_server/es_helpers.py b/ingestion_server/ingestion_server/es_helpers.py index 98a70119b47..5f0e3280794 100644 --- a/ingestion_server/ingestion_server/es_helpers.py +++ b/ingestion_server/ingestion_server/es_helpers.py @@ -2,10 +2,9 @@ import time from typing import NamedTuple -from aws_requests_auth.aws_auth import AWSRequestsAuth from decouple import config from elasticsearch import ConnectionError as EsConnectionError -from elasticsearch import Elasticsearch, NotFoundError, RequestsHttpConnection +from elasticsearch import Elasticsearch, NotFoundError class Stat(NamedTuple): @@ -45,30 +44,16 @@ def _elasticsearch_connect() -> Elasticsearch: :return: an Elasticsearch client """ - elasticsearch_url = config("ELASTICSEARCH_URL", default="localhost") - elasticsearch_port = config("ELASTICSEARCH_PORT", default=9200, cast=int) + es_scheme = config("ELASTICSEARCH_SCHEME", default="http://") + es_url = config("ELASTICSEARCH_URL", default="localhost") + es_port = config("ELASTICSEARCH_PORT", default=9200, cast=int) - # For AWS IAM access to Elasticsearch - aws_region = config("AWS_REGION", "us-east-1") - aws_access_key_id = config("AWS_ACCESS_KEY_ID", default="") - aws_secret_access_key = config("AWS_SECRET_ACCESS_KEY", default="") + es_endpoint = f"{es_scheme}{es_url}:{es_port}" timeout = 12 # hours - log.info(f"Connecting to {elasticsearch_url}:{elasticsearch_port} with AWS auth") - auth = AWSRequestsAuth( - aws_access_key=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_host=elasticsearch_url, - aws_region=aws_region, - aws_service="es", - ) - auth.encode = lambda x: bytes(x.encode("utf-8")) es = Elasticsearch( - host=elasticsearch_url, - port=elasticsearch_port, - connection_class=RequestsHttpConnection, - http_auth=auth, + es_endpoint, timeout=timeout * 3600, # seconds ) es.info() diff --git a/ingestion_server/test/integration_test.py b/ingestion_server/test/integration_test.py index 3c9678143dd..6a71fdcc196 100644 --- a/ingestion_server/test/integration_test.py +++ b/ingestion_server/test/integration_test.py @@ -19,7 +19,7 @@ # Uses Bottle because, unlike Falcon, it can be run from within the test suite. from bottle import Bottle -from elasticsearch import Elasticsearch, NotFoundError, RequestsHttpConnection +from elasticsearch import Elasticsearch, NotFoundError from .gen_integration_compose import gen_integration_compose from .test_constants import service_ports @@ -167,7 +167,6 @@ def _load_data(cls, conn, table_names): f"COPY {table_name} FROM STDIN WITH (FORMAT csv, HEADER true)", data, ) - cur.execute(f"REFRESH MATERIALIZED VIEW {table_name}_view") conn.commit() cur.close() @@ -465,16 +464,15 @@ def test_task_count_after_two(self): self.assertEqual(2, len(res_json), msg) def _get_es(self): - return Elasticsearch( - host="localhost", - port=service_ports["es"], - connection_class=RequestsHttpConnection, - timeout=10, + endpoint = f"http://localhost:{service_ports['es']}" + es = Elasticsearch( + endpoint, + request_timeout=10, max_retries=10, retry_on_timeout=True, - http_auth=None, - wait_for_status="yellow", ) + es.cluster.health(wait_for_status="yellow") + return es @pytest.mark.order(after="test_task_count_after_two") def test_promote_images(self): diff --git a/ingestion_server/test/mock_schemas/audio_view.sql b/ingestion_server/test/mock_schemas/audio_view.sql deleted file mode 100644 index fa5e991a903..00000000000 --- a/ingestion_server/test/mock_schemas/audio_view.sql +++ /dev/null @@ -1,192 +0,0 @@ --- --- PostgreSQL database dump --- - --- Dumped from database version 13.2 --- Dumped by pg_dump version 13.3 (Debian 13.3-1.pgdg100+1) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: audio_view; Type: TABLE; Schema: public; Owner: deploy --- - -CREATE TABLE public.audio_view ( - id integer NOT NULL, - created_on timestamp with time zone NOT NULL, - updated_on timestamp with time zone NOT NULL, - identifier uuid NOT NULL, - foreign_identifier character varying(1000), - title character varying(2000), - foreign_landing_url character varying(1000), - creator character varying(2000), - creator_url character varying(2000), - url character varying(1000), - filesize integer, - watermarked boolean, - license character varying(50) NOT NULL, - license_version character varying(25), - provider character varying(80), - source character varying(80), - last_synced_with_source timestamp with time zone, - removed_from_source boolean NOT NULL, - view_count integer, - tags jsonb, - meta_data jsonb, - audio_set_position integer, - genres character varying(80)[], - category character varying(80), - duration integer, - bit_rate integer, - sample_rate integer, - alt_files jsonb, - thumbnail character varying(1000), - filetype character varying(80), - audio_set_foreign_identifier character varying(1000), - standardized_popularity double precision, - ingestion_type character varying(1000), - audio_set jsonb -); - - -ALTER TABLE public.audio_view OWNER TO deploy; - --- --- Name: audio_id_seq; Type: SEQUENCE; Schema: public; Owner: deploy --- - -CREATE SEQUENCE public.audio_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.audio_id_seq OWNER TO deploy; - --- --- Name: audio_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: deploy --- - -ALTER SEQUENCE public.audio_id_seq OWNED BY public.audio_view.id; - - --- --- Name: audio_view id; Type: DEFAULT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.audio_view ALTER COLUMN id SET DEFAULT nextval('public.audio_id_seq'::regclass); - - --- --- Name: audio_view audio_identifier_key; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.audio_view - ADD CONSTRAINT audio_identifier_key UNIQUE (identifier); - - --- --- Name: audio_view audio_pkey; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.audio_view - ADD CONSTRAINT audio_pkey PRIMARY KEY (id); - - --- --- Name: audio_view audio_url_key; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.audio_view - ADD CONSTRAINT audio_url_key UNIQUE (url); - - --- --- Name: audio_view unique_provider_audio; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.audio_view - ADD CONSTRAINT unique_provider_audio UNIQUE (foreign_identifier, provider); - - --- --- Name: audio_category_ceb7d386; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_category_ceb7d386 ON public.audio_view USING btree (category); - - --- --- Name: audio_category_ceb7d386_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_category_ceb7d386_like ON public.audio_view USING btree (category varchar_pattern_ops); - - --- --- Name: audio_foreign_identifier_617f66ad; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_foreign_identifier_617f66ad ON public.audio_view USING btree (foreign_identifier); - - --- --- Name: audio_foreign_identifier_617f66ad_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_foreign_identifier_617f66ad_like ON public.audio_view USING btree (foreign_identifier varchar_pattern_ops); - - --- --- Name: audio_genres_e34cc474; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_genres_e34cc474 ON public.audio_view USING btree (genres); - - --- --- Name: audio_last_synced_with_source_94c4a383; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_last_synced_with_source_94c4a383 ON public.audio_view USING btree (last_synced_with_source); - - --- --- Name: audio_provider_8fe1eb54; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_provider_8fe1eb54 ON public.audio_view USING btree (provider); - - --- --- Name: audio_provider_8fe1eb54_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_provider_8fe1eb54_like ON public.audio_view USING btree (provider varchar_pattern_ops); - - --- --- Name: audio_source_e9ccc813; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX audio_source_e9ccc813 ON public.audio_view USING btree (source); - - --- --- PostgreSQL database dump complete --- diff --git a/ingestion_server/test/mock_schemas/image_view.sql b/ingestion_server/test/mock_schemas/image_view.sql deleted file mode 100644 index 6d94493b81a..00000000000 --- a/ingestion_server/test/mock_schemas/image_view.sql +++ /dev/null @@ -1,178 +0,0 @@ --- --- PostgreSQL database dump --- - --- Dumped from database version 13.2 --- Dumped by pg_dump version 13.3 (Debian 13.3-1.pgdg100+1) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: image_view; Type: TABLE; Schema: public; Owner: deploy --- - -CREATE TABLE public.image_view ( - id integer NOT NULL, - created_on timestamp with time zone NOT NULL, - updated_on timestamp with time zone NOT NULL, - identifier uuid NOT NULL, - provider character varying(80), - source character varying(80), - foreign_identifier character varying(1000), - foreign_landing_url character varying(1000), - url character varying(1000), - thumbnail character varying(1000), - width integer, - height integer, - filesize integer, - license character varying(50) NOT NULL, - license_version character varying(25), - creator character varying(2000), - creator_url character varying(2000), - title character varying(2000), - last_synced_with_source timestamp with time zone, - removed_from_source boolean NOT NULL, - meta_data jsonb, - view_count integer DEFAULT 0, - tags jsonb, - watermarked boolean, - filetype character varying(80), - standardized_popularity double precision, - ingestion_type character varying(1000) -); - - -ALTER TABLE public.image_view OWNER TO deploy; - --- --- Name: image_id_seq; Type: SEQUENCE; Schema: public; Owner: deploy --- - -CREATE SEQUENCE public.image_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.image_id_seq OWNER TO deploy; - --- --- Name: image_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: deploy --- - -ALTER SEQUENCE public.image_id_seq OWNED BY public.image_view.id; - - --- --- Name: image_view id; Type: DEFAULT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.image_view ALTER COLUMN id SET DEFAULT nextval('public.image_id_seq'::regclass); - - --- --- Name: image_view image_identifier_key; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.image_view - ADD CONSTRAINT image_identifier_key UNIQUE (identifier); - - --- --- Name: image_view image_pkey; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.image_view - ADD CONSTRAINT image_pkey PRIMARY KEY (id); - - --- --- Name: image_view image_url_key; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.image_view - ADD CONSTRAINT image_url_key UNIQUE (url); - - --- --- Name: image_view unique_provider_image; Type: CONSTRAINT; Schema: public; Owner: deploy --- - -ALTER TABLE ONLY public.image_view - ADD CONSTRAINT unique_provider_image UNIQUE (foreign_identifier, provider); - - --- --- Name: image_foreign_identifier_4c72d3ee; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_foreign_identifier_4c72d3ee ON public.image_view USING btree (foreign_identifier); - - --- --- Name: image_foreign_identifier_4c72d3ee_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_foreign_identifier_4c72d3ee_like ON public.image_view USING btree (foreign_identifier varchar_pattern_ops); - - --- --- Name: image_last_synced_with_source_187adf09; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_last_synced_with_source_187adf09 ON public.image_view USING btree (last_synced_with_source); - - --- --- Name: image_provider_7d11f847; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_provider_7d11f847 ON public.image_view USING btree (provider); - - --- --- Name: image_provider_7d11f847_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_provider_7d11f847_like ON public.image_view USING btree (provider varchar_pattern_ops); - - --- --- Name: image_source_d5a89e97; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_source_d5a89e97 ON public.image_view USING btree (source); - - --- --- Name: image_source_d5a89e97_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_source_d5a89e97_like ON public.image_view USING btree (source varchar_pattern_ops); - - --- --- Name: image_url_c6aabda2_like; Type: INDEX; Schema: public; Owner: deploy --- - -CREATE INDEX image_url_c6aabda2_like ON public.image_view USING btree (url varchar_pattern_ops); - - --- --- PostgreSQL database dump complete --- diff --git a/load_sample_data.sh b/load_sample_data.sh index 344b3f33ab1..6dc53712f00 100755 --- a/load_sample_data.sh +++ b/load_sample_data.sh @@ -28,7 +28,6 @@ function load_sample_data { \copy $1 \ from './sample_data/sample_$1.csv' \ with (FORMAT csv, HEADER true); - REFRESH MATERIALIZED VIEW $1_view; EOF" } diff --git a/packages/eslint-plugin/src/configs/import.ts b/packages/eslint-plugin/src/configs/import.ts index 4bbcc90d0e1..bcab2723100 100644 --- a/packages/eslint-plugin/src/configs/import.ts +++ b/packages/eslint-plugin/src/configs/import.ts @@ -4,9 +4,14 @@ import type { TSESLint } from "@typescript-eslint/utils" * ESLint `import` plugin configuration. */ export = { - extends: ["plugin:import/recommended"], + extends: ["plugin:import/recommended", "plugin:import/typescript"], plugins: ["import"], rules: { + // `namespace` and `default` are handled by TypeScript + // There's no need to rely on ESLint for this + // https://github.com/import-js/eslint-plugin-import/issues/2878 + "import/namespace": "off", + "import/default": "off", "import/newline-after-import": ["error"], "import/order": [ "error", @@ -65,6 +70,27 @@ export = { "import/extensions": ["error", "always", { js: "never", ts: "never" }], }, overrides: [ + { + files: ["frontend/**"], + settings: { + "import/resolver": { + typescript: { + project: "frontend/tsconfig.json", + extensions: [".js", ".ts", ".vue", ".png"], + }, + }, + }, + }, + { + files: ["packages/**"], + settings: { + "import/resolver": { + typescript: { + project: "packages/*/tsconfig.json", + }, + }, + }, + }, { files: ["frontend/.storybook/**"], rules: { diff --git a/packages/eslint-plugin/src/configs/index.ts b/packages/eslint-plugin/src/configs/index.ts index 5ae7da0abd7..13363b2e92a 100644 --- a/packages/eslint-plugin/src/configs/index.ts +++ b/packages/eslint-plugin/src/configs/index.ts @@ -12,20 +12,17 @@ export const project: TSESLint.Linter.Config = { node: true, }, parser: "vue-eslint-parser", - parserOptions: { - parser: "@typescript-eslint/parser", - }, extends: [ "eslint:recommended", - "plugin:@typescript-eslint/recommended", "plugin:eslint-comments/recommended", "plugin:jsonc/recommended-with-jsonc", require.resolve("./custom"), require.resolve("./vue"), require.resolve("./import"), + require.resolve("./typescript"), "prettier", ], - plugins: ["@typescript-eslint", "tsdoc", "unicorn"], + plugins: ["unicorn"], settings: { "vue-i18n": { localeDir: "./frontend/src/locales/*.{json}", @@ -36,40 +33,12 @@ export const project: TSESLint.Linter.Config = { semi: ["error", "never"], "no-console": "off", "unicorn/filename-case": ["error", { case: "kebabCase" }], - "@typescript-eslint/no-var-requires": ["off"], }, overrides: [ - { - files: ["*.ts"], - rules: { - "tsdoc/syntax": "error", - }, - }, { files: ["*.json", "*.json5", "*.jsonc"], parser: "jsonc-eslint-parser", }, - { - files: ["frontend/**"], - settings: { - "import/resolver": { - typescript: { - project: "frontend/tsconfig.json", - extensions: [".js", ".ts", ".vue", ".png"], - }, - }, - }, - }, - { - files: ["packages/**"], - settings: { - "import/resolver": { - typescript: { - project: "packages/*/tsconfig.json", - }, - }, - }, - }, { env: { jest: true }, files: ["packages/**/*/test", "frontend/test/unit/**"], @@ -128,7 +97,6 @@ export const project: TSESLint.Linter.Config = { "unicorn/filename-case": "off", }, }, - { files: ["frontend/src/components/**"], rules: { diff --git a/packages/eslint-plugin/src/configs/typescript.ts b/packages/eslint-plugin/src/configs/typescript.ts new file mode 100644 index 00000000000..85b6be75cce --- /dev/null +++ b/packages/eslint-plugin/src/configs/typescript.ts @@ -0,0 +1,24 @@ +import type { TSESLint } from "@typescript-eslint/utils" + +export = { + parserOptions: { + parser: "@typescript-eslint/parser", + }, + plugins: ["@typescript-eslint", "tsdoc"], + extends: ["plugin:@typescript-eslint/recommended"], + rules: { + "@typescript-eslint/no-var-requires": ["off"], + }, + overrides: [ + { + files: ["*.ts"], + rules: { + "tsdoc/syntax": "error", + // This rule is disabled above to avoid forcing ESM syntax on regular JS files + // that aren't ready for it yet. We do want to enforce this for TypeScript, + // however, so we re-enable it here. + "@typescript-eslint/no-var-requires": ["error"], + }, + }, + ], +} satisfies TSESLint.Linter.Config diff --git a/packages/eslint-plugin/src/rules/no-unexplained-disabled-test.ts b/packages/eslint-plugin/src/rules/no-unexplained-disabled-test.ts index 91242eaca93..bb76f56d84a 100644 --- a/packages/eslint-plugin/src/rules/no-unexplained-disabled-test.ts +++ b/packages/eslint-plugin/src/rules/no-unexplained-disabled-test.ts @@ -25,9 +25,7 @@ export const noUnexplainedDisabledTest = OpenverseRule<[], MessageIds>({ create(context) { const sourceCode = context.getSourceCode() - const hasIssueCommentWithLink = ( - node: TSESTree.Node | TSESTree.Comment - ) => { + const hasIssueCommentWithLink = (node: TSESTree.Node) => { const commentsBeforeNode = sourceCode.getCommentsBefore(node) for (const comment of commentsBeforeNode) { if (/\bhttps:\/\/github\.com\/.*?\/issues\/\d+\b/.test(comment.value)) { @@ -38,14 +36,14 @@ export const noUnexplainedDisabledTest = OpenverseRule<[], MessageIds>({ return false } - const testSkipRegex = /test\.skip\s*\(/g - const testSkipEachRegex = /test\.skip\.each\s*\(/g - const testConcurrentSkipEachRegex = /test\.concurrent\.skip\.each\s*\(/g - const testTodoRegex = /test\.todo\s*\(/g - const itSkipRegex = /it\.skip\s*\(/g - const itEachSkipRegex = /it\.each\.skip\s*\(/g - const describeSkipRegex = /describe\.skip\s*\(/g - const describeEachSkipRegex = /describe\.each\.skip\s*\(/g + const testSkipRegex = /^test\.skip\s*\(/g + const testSkipEachRegex = /^test\.skip\.each\s*\(/g + const testConcurrentSkipEachRegex = /^test\.concurrent\.skip\.each\s*\(/g + const testTodoRegex = /^test\.todo\s*\(/g + const itSkipRegex = /^it\.skip\s*\(/g + const itEachSkipRegex = /^it\.each\.skip\s*\(/g + const describeSkipRegex = /^describe\.skip\s*\(/g + const describeEachSkipRegex = /^describe\.each\.skip\s*\(/g return { CallExpression(node) { diff --git a/packages/eslint-plugin/test/rules/no-unexplained-disabled-test.spec.ts b/packages/eslint-plugin/test/rules/no-unexplained-disabled-test.spec.ts index be0e44e6d19..9eb484d17ca 100644 --- a/packages/eslint-plugin/test/rules/no-unexplained-disabled-test.spec.ts +++ b/packages/eslint-plugin/test/rules/no-unexplained-disabled-test.spec.ts @@ -50,6 +50,40 @@ const invalidTestCases = [ `, errors: [{ messageId: "missingIssueComment" } as const], }, + { + name: "Nested blocks with multiple skipped tests shows multiple errors", + code: ` + describe("block", () => { + test("no skipped", () => {}) + + test.skip("first skipped", () => {}) + + test("also not skipped", () => {}) + + test.skip("second skipped", () => {}) + }) + `, + errors: [ + { messageId: "missingIssueComment" }, + { messageId: "missingIssueComment" }, + ] as const, + }, + { + name: "Skipped external block and skipped nested block have separate errors", + code: ` + test.skip("external skip", () => {}) + + describe("block", () => { + test("not skipped", () => {}) + + test.skip("nested skip", () => {}) + }) + `, + errors: [ + { messageId: "missingIssueComment" }, + { messageId: "missingIssueComment" }, + ] as const, + }, ] const validTestCases = [ @@ -114,6 +148,29 @@ const validTestCases = [ }) `, }, + { + name: "Nested skip valid with comment on skipped test", + code: ` + describe("group of tests", () => { + // https://github.com/WordPress/openverse/issues/2573 + test.skip("skipped", () => {}) + }) + `, + }, + { + name: "Nested and external blocks do not error", + code: ` + // https://github.com/WordPress/openverse/issues/2573 + test.skip("external skip", () => {}) + + describe("block", () => { + test("not skipped", () => {}) + + // https://github.com/WordPress/openverse/issues/2573 + test.skip("nested skip", () => {}) + }) + `, + }, ] // Run the tests diff --git a/tsconfig.base.json b/tsconfig.base.json index 3b70618c123..cc5c9d24ff1 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -10,12 +10,18 @@ "esModuleInterop": true, /* Strict Type-Checking Options */ "strict": true, - "noUnusedLocals": true, - "noUnusedParameters": true, "noImplicitReturns": true, "noFallthroughCasesInSwitch": true, "paths": { "@openverse/*": ["./packages/*/src"] - } + }, + + /** + * Disable these in favour of more flexible ESLint rule. + * + * https://typescript-eslint.io/rules/no-unused-vars/#benefits-over-typescript + */ + "noUnusedLocals": false, + "noUnusedParameters": false } }