Skip to content

Commit

Permalink
Update bad_data.php
Browse files Browse the repository at this point in the history
  • Loading branch information
GlazerMann authored Dec 27, 2024
1 parent 8bc221a commit 1cbaf03
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions constants/bad_data.php
Original file line number Diff line number Diff line change
Expand Up @@ -597,9 +597,11 @@
'iopscience.iop.org',
'iospress.com',
'iospress.nl',
'jamanetwork.com/journals',
'journal.chestnet.org',
'journals.ametsoc.org',
'journals.ku.edu',
'journals.openedition.org',
'journals.royalsociety.org',
'journals.uchicago.edu',
'journals.upress.ufl.edu',
Expand All @@ -612,6 +614,7 @@
'pnas.org',
'psyche.entclub.org',
'psycnet.apa.org',
'publications.aap.org',
'pubs.geoscienceworld.org',
'pubs.rsc.org',
'sagepub.com',
Expand Down Expand Up @@ -644,9 +647,12 @@
];

const JOURNAL_ARCHIVES_SITES = [
'zaguan.unizar.es',
'digitalcommons.colby.edu',
'ecommons.luc.edu',
'hal.science',
'perspectivia.net',
'shs.cairn.info',
'zaguan.unizar.es',
];

const PROXY_HOSTS_TO_ALWAYS_DROP = [
Expand Down Expand Up @@ -1128,8 +1134,6 @@
'kijkcijferanalyse\.nl/', // highjacked
];
const NON_JOURNAL_WEBSITES = [
'.ietf.org/',
'.nsw.gov.au/',
'-news.co.uk/',
'.ajc.com/',
'.al.com/',
Expand Down Expand Up @@ -1161,18 +1165,22 @@
'.fec.gov/',
'.ft.com/',
'.google.com/',
'.ietf.org/',
'.ign.com/',
'.jd.com/',
'.mic.com/',
'.muzines.co.uk/',
'.newsbank.com/',
'.nist.gov/',
'.nj.com/',
'.nsw.gov.au/',
'.pbs.org/',
'.qq.com/',
'.rt.com/',
'.svg.com/',
'.time.com/',
'.tvp.info/',
'.up.com/',
'.vk.com/',
'.yy.com/',
'/ajc.com/',
Expand Down Expand Up @@ -1209,7 +1217,9 @@
'/qq.com/',
'/rt.com/',
'/time.com/',
'/up.com/',
'/vk.com/',
'/x.com/',
'/yy.com/',
'112.international/',
'112.ua/',
Expand Down Expand Up @@ -5815,6 +5825,7 @@
'itu.int/',
'itunes.apple.com/',
'iucn.org/',
'iucnredlist.org/',
'iue.tuwien.ac.at/',
'iup.edu/',
'iuscanada.com/',
Expand Down Expand Up @@ -6700,6 +6711,7 @@
'loreal.com/',
'los40.com/',
'losinrocks.com/',
'losreinosdelasindias.hypotheses.org/',
'lost52project.org/',
'lostcircuits.com/',
'lostgalleon.com/',
Expand Down Expand Up @@ -8793,6 +8805,7 @@
'presidentsusa.net/',
'presidiotheatre.org/',
'presonus.com/',
'press.armywarcollege.edu/',
'press.bmwgroup.com/',
'press.pl/',
'pressacademy.com/',
Expand Down Expand Up @@ -9100,6 +9113,7 @@
'readbookonline.org/',
'readingfc.co.uk/',
'readingmuseum.org.uk/',
'readingroom.law.gsu.edu/',
'readingschools.scot/',
'readingzone.com/',
'reaganlibrary.gov/',
Expand Down Expand Up @@ -9845,6 +9859,7 @@
'shrewsburytown.com/',
'shrinershospitalsforchildren.org/',
'shrm.org/',
'shs.cairn.info/',
'shtong.gov.cn/',
'shudo.net/',
'shueisha.co.jp/',
Expand Down Expand Up @@ -11295,7 +11310,6 @@
'twin2.co.jp/',
'twitch.tv/',
'twitter.com/',
'/x.com/',
'twreporter.org/',
'twtd.co.uk/',
'txexecutions.org/',
Expand Down Expand Up @@ -11453,8 +11467,6 @@
'unz.org/',
'uol.com.br/',
'up.ac.za/',
'/up.com/',
'.up.com/',
'upcomer.com/',
'upi.com/',
'upmc.fr/',
Expand Down

0 comments on commit 1cbaf03

Please sign in to comment.