From a7b9a3733ed540f8a1098bad4002144d7ed23eaf Mon Sep 17 00:00:00 2001 From: omrilotan Date: Sun, 17 Dec 2023 23:31:11 +0000 Subject: [PATCH] Major Change Interface: release as "next" --- .circleci/config.yml | 73 +++--- .editorconfig | 7 +- .eslintignore | 2 - .gitattributes | 4 +- .gitignore | 11 +- .gitmodules | 4 - .npmignore | 14 -- .npmrc | 1 - .nvmrc | 2 +- .prettierignore | 3 + .remarkrc | 5 - .rollup.js | 34 --- CHANGELOG.md | 355 ++++++++++++++++++---------- CODE_OF_CONDUCT.md | 1 - CONTRIBUTING.md | 12 +- LICENSE | 2 + README.md | 174 +++++++------- SECURITY.md | 9 +- babel.config.json | 10 - fixtures/browsers.yml | 2 +- fixtures/crawlers.yml | 1 + fixtures/index.ts | 16 ++ index.d.ts | 58 ----- jest.config.js | 4 + package.json | 78 ++---- page/index.pug | 4 +- page/opensearch.xml | 4 +- page/script.js | 159 +++++++------ page/styles.css | 155 ++++++------ scripts/authors/index.js | 51 ++-- scripts/authors/sortBy/index.js | 14 -- scripts/build/pattern.js | 12 + scripts/build/procedure.sh | 26 ++ scripts/format/procedure.sh | 14 ++ scripts/gh-pages/procedure.sh | 27 +++ scripts/lib/client/index.js | 75 ------ scripts/lib/dedup/index.js | 6 - scripts/lib/exists/index.js | 16 +- scripts/lib/write/index.js | 15 -- {src => scripts}/package.json | 0 scripts/prepare/args/index.js | 6 +- scripts/prepare/build/index.js | 142 ++++------- scripts/prepare/externals/index.js | 165 ++++++------- scripts/prepare/index.js | 65 ++--- scripts/prepublish/procedure.sh | 18 ++ scripts/pushmodules/index.sh | 5 - scripts/sort/dedup/index.js | 6 - scripts/sort/downcase/index.js | 6 - scripts/sort/index.js | 40 ++-- scripts/sort/sort/index.js | 10 +- scripts/sort/sortJSON/index.js | 22 +- scripts/sort/sortYamlFile/index.js | 49 ++-- scripts/symlink/index.js | 33 --- src/amend/index.js | 36 --- src/browser.ts | 3 + src/index.js | 5 - src/index.ts | 56 +++++ src/isbot/index.js | 158 ------------- src/{list.json => patterns.json} | 12 +- tests/browser/karma.js | 23 -- tests/browser/rollup.js | 14 -- tests/browser/spec.js | 21 -- tests/cjs/package.json | 3 - tests/cjs/spec.js | 19 -- tests/efficiency/spec.js | 81 ------- tests/efficiency/test.ts | 95 ++++++++ tests/esm/package.json | 3 - tests/esm/spec.js | 19 -- tests/jest/.npmrc | 1 - tests/jest/babel.config.js | 6 - tests/jest/cjs/test.js | 9 - tests/jest/esm/test.js | 9 - tests/jest/package.json | 24 -- tests/jest/ts/test.ts | 10 - tests/{efficiency => }/package.json | 0 tests/spec/test.ts | 73 ++++++ tests/specs/package.json | 3 - tests/specs/spec.js | 158 ------------- tsconfig.json | 16 +- 79 files changed, 1201 insertions(+), 1683 deletions(-) delete mode 100644 .eslintignore delete mode 100644 .gitmodules delete mode 100644 .npmignore create mode 100644 .prettierignore delete mode 100644 .remarkrc delete mode 100644 .rollup.js delete mode 100644 babel.config.json create mode 100644 fixtures/index.ts delete mode 100644 index.d.ts create mode 100644 jest.config.js delete mode 100644 scripts/authors/sortBy/index.js create mode 100755 scripts/build/pattern.js create mode 100755 scripts/build/procedure.sh create mode 100755 scripts/format/procedure.sh create mode 100755 scripts/gh-pages/procedure.sh delete mode 100644 scripts/lib/client/index.js delete mode 100644 scripts/lib/dedup/index.js delete mode 100644 scripts/lib/write/index.js rename {src => scripts}/package.json (100%) create mode 100755 scripts/prepublish/procedure.sh delete mode 100755 scripts/pushmodules/index.sh delete mode 100644 scripts/sort/dedup/index.js delete mode 100644 scripts/sort/downcase/index.js delete mode 100755 scripts/symlink/index.js delete mode 100644 src/amend/index.js create mode 100644 src/browser.ts delete mode 100644 src/index.js create mode 100644 src/index.ts delete mode 100644 src/isbot/index.js rename src/{list.json => patterns.json} (93%) delete mode 100644 tests/browser/karma.js delete mode 100644 tests/browser/rollup.js delete mode 100644 tests/browser/spec.js delete mode 100644 tests/cjs/package.json delete mode 100644 tests/cjs/spec.js delete mode 100644 tests/efficiency/spec.js create mode 100644 tests/efficiency/test.ts delete mode 100644 tests/esm/package.json delete mode 100644 tests/esm/spec.js delete mode 100644 tests/jest/.npmrc delete mode 100644 tests/jest/babel.config.js delete mode 100644 tests/jest/cjs/test.js delete mode 100644 tests/jest/esm/test.js delete mode 100644 tests/jest/package.json delete mode 100644 tests/jest/ts/test.ts rename tests/{efficiency => }/package.json (100%) create mode 100644 tests/spec/test.ts delete mode 100644 tests/specs/package.json delete mode 100644 tests/specs/spec.js diff --git a/.circleci/config.yml b/.circleci/config.yml index 391728c6..69820318 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,11 +27,7 @@ workflows: matrix: parameters: image: - - cimg/node:12.22 - - cimg/node:14.18 - - cimg/node:16.15 - - cimg/node:18.4 - - cimg/node:19.6 + - cimg/node:18.19 - cimg/node:lts - cimg/node:current requires: @@ -44,31 +40,29 @@ workflows: filters: branches: only: - - main + - next - pages: context: globalenv requires: - publish - # scheduled: - # jobs: - # - periodic - # triggers: - # - schedule: - # cron: "0 9 * * 1" - # filters: - # branches: - # only: - # - main + scheduled: + jobs: + - periodic + triggers: + - schedule: + cron: "0 9 * * 1" + filters: + branches: + only: + - next jobs: build: docker: - - image: cimg/node:lts-browsers + - image: cimg/node:lts steps: - checkout - - run: git submodule update --init --recursive - run: node -v - run: npm i - - browsers - run: npm run build - persist_to_workspace: root: . @@ -77,18 +71,24 @@ jobs: tests: working_directory: ~/project docker: - - image: cimg/node:lts-browsers + - image: cimg/node:lts steps: - run: node -v - attach_workspace: at: . - - run: npm run lint - - run: npm run sort - - run: git diff --quiet || exit 1 - - run: npm run remark + - run: npm run format + - run: + name: Push autofixes + command: | + if [ "$(git diff --quiet && echo 0 || echo $?)" -gt 0 ]; then + git config --global user.email omrilotan@users.noreply.github.com --replace-all + git config --global user.name omrilotan --replace-all + git commit -am "😎 Autofix" + git remote set-url origin https://$CIRCLE_USERNAME:$GITHUB_API_TOKEN@github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git + git push -u origin HEAD:${CIRCLE_BRANCH} + exit 1 + fi - run: npm t - - run: npm run jest - - run: npm run ts compatibility: parameters: image: @@ -102,19 +102,11 @@ jobs: - run: node -v - attach_workspace: at: . - - run: npm run symlink - - run: npm run cjs - - when: - condition: - matches: - pattern: "cimg/node:1[2-8]-.*" - value: << parameters.image >> - steps: - - run: npm run esm + - run: npm t publish: working_directory: ~/project docker: - - image: cimg/node:lts-browsers + - image: cimg/node:lts steps: - checkout - run: @@ -132,7 +124,7 @@ jobs: - attach_workspace: at: . - browsers - - run: npm publish + - run: npm publish --tag next - run: name: Add git tag command: git tag -a "v$(cat package.json | jq ".version" -r)" -m "$(git show -s --format=%B | tr -d '\n')" @@ -151,19 +143,18 @@ jobs: command: cp -r .circleci docs/ - run: name: Post to gh-pages - command: curl ci-cd.net/v1/gh/pages | bash -s docs/. + command: echo ./scripts/gh-pages/procedure.sh | bash -s periodic: working_directory: ~/project docker: - image: cimg/node:lts steps: - checkout - - run: git submodule update --init --recursive - run: npm i - run: | - npm run spec || curl --request POST \ + npm t || curl --request POST \ --url https://api.github.com/repos/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}/issues \ - --header 'authorization: Bearer ${GITHUB_TOKEN}' \ + --header 'authorization: Bearer ${ISBOT_ISSUE_TOKEN}' \ --header 'content-type: application/json' \ --data '{ "title": "Automated issue for failing periodic tests", diff --git a/.editorconfig b/.editorconfig index c7e356ea..41ac5166 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,4 +1,3 @@ -# EditorConfig is awesome: http://EditorConfig.org root = true [*] @@ -6,5 +5,9 @@ end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true charset = utf-8 -indent_style = space +indent_style = tab tab_width = 2 + +[*.{yml,json,md}] +indent_size = 2 +indent_style = space diff --git a/.eslintignore b/.eslintignore deleted file mode 100644 index 9fa1e3ba..00000000 --- a/.eslintignore +++ /dev/null @@ -1,2 +0,0 @@ -dist.js -/index.* diff --git a/.gitattributes b/.gitattributes index f71ffcd7..8d3ab02b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,3 @@ * text eol=lf -.github/**/* linguist-vendored -scripts/* linguist-vendored -page/* linguist-vendored +fixtures/downloads/* linguist-vendored diff --git a/.gitignore b/.gitignore index 8ebebafe..1e391211 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ node_modules -/.cache -/docs + +# Auomatically built files +/src/pattern.ts +/index* +/*.d.ts /fixtures/index.json -dist.js /AUTHORS -/index.* -!/index.d.ts +/docs diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 041f49bf..00000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "fixtures/downloads"] - path = fixtures/downloads - url = git@github.com:omrilotan/isbot.git - branch = downloads diff --git a/.npmignore b/.npmignore deleted file mode 100644 index 91c2ee13..00000000 --- a/.npmignore +++ /dev/null @@ -1,14 +0,0 @@ -.* -*.md -LICENSE - -docs/* -fixtures/* -jest/* -page/* -scripts/* -src/* -tests/* - -babel.config.json -tsconfig.json diff --git a/.npmrc b/.npmrc index adcdb9e9..e1a92050 100644 --- a/.npmrc +++ b/.npmrc @@ -1,3 +1,2 @@ package-lock=false access=public -user-agent=isbot diff --git a/.nvmrc b/.nvmrc index 3c032078..209e3ef4 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -18 +20 diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..0bac26e4 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,3 @@ +.* +/index* +src/pattern.ts diff --git a/.remarkrc b/.remarkrc deleted file mode 100644 index a81c42ee..00000000 --- a/.remarkrc +++ /dev/null @@ -1,5 +0,0 @@ -{ - "plugins": [ - "remark-preset-lint-recommended" - ] -} diff --git a/.rollup.js b/.rollup.js deleted file mode 100644 index a1cd0e13..00000000 --- a/.rollup.js +++ /dev/null @@ -1,34 +0,0 @@ -const { join } = require('path') -const { babel } = require('@rollup/plugin-babel') -const { importAssertions } = require('acorn-import-assertions') -const json = require('@rollup/plugin-json') - -module.exports = [ - { - ext: 'iife.js', format: 'iife' - }, - { - ext: 'js', format: 'cjs' - }, - { - ext: 'mjs', format: 'es' - } -].map( - ({ ext, format }) => ({ - input: join(__dirname, 'src', 'index.js'), - output: { - file: join(__dirname, [ 'index', ext ].join('.')), - format, - exports: 'auto', - name: 'isbot', - strict: false, - sourcemap: true, - sourcemapFile: join(__dirname, [ 'index', ext, 'map' ].join('.')) - }, - acornInjectPlugins: [ importAssertions ], - plugins: [ - babel({ babelHelpers: 'bundled' }), - json() - ] - }) -) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24749519..9d621ef9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,279 +1,383 @@ # Changelog +## [4.0.0](https://github.com/omrilotan/isbot/compare/v3.7.1...v4.0.0) + +### Breaking changes + +This change is meant to reduce the size of the package and improve performance by building the regular expression in build time instead of runtime. + +- Change interface + + - Remove default import. Use named import instead: `import { isbot } from "isbot";` + - Drop `isbot` attached functions from the interface. `isbot.` is no longer supported + +- Drop support for EOL node versions + +### New features + +```ts +import { } from "isbot"; +``` + +| import | Type | Description | +| ------------- | --------------------------------------------------- | ------------------------------------------------------------------------- | +| pattern | _{RegExp}_ | The regular expression used to identify bots | +| list | _{string[]}_ | List of all individual pattern parts | +| isbotMatch | _{(userAgent: string): string \| null}_ | The substring matched by the regular expression | +| isbotMatches | _{(userAgent: string): string[]}_ | All substrings matched by the regular expression | +| isbotPattern | _{(userAgent: string): string \| null}_ | The regular expression used to identify bot substring in the user agent | +| isbotPatterns | _{(userAgent: string): string[]}_ | All regular expressions used to identify bot substrings in the user agent | +| createIsbot | _{(pattern: RegExp): (userAgent: string): boolean}_ | Create a custom isbot function | + ## [3.7.1](https://github.com/omrilotan/isbot/compare/v3.7.0...v3.7.1) -- Replace "ghost" with "inspect" to avoid false positives + +- Replace "ghost" with "inspect" to avoid false positives ## [3.7.0](https://github.com/omrilotan/isbot/compare/v3.6.13...v3.7.0) -- Expose iife and support JSDeliver CDN + +- Expose iife and support JSDeliver CDN ## [3.6.13](https://github.com/omrilotan/isbot/compare/v3.6.12...v3.6.13) -- Treat Splash browser as bot [@viraptor](https://github.com/viraptor) + +- Treat Splash browser as bot [@viraptor](https://github.com/viraptor) ## [3.6.12](https://github.com/omrilotan/isbot/compare/v3.6.11...v3.6.12) -- mem: Make a group non capturing + +- mem: Make a group non capturing ## [3.6.11](https://github.com/omrilotan/isbot/compare/v3.6.10...v3.6.11) -- Fix "news" pattern to allow [Google News Android App](https://play.google.com/store/apps/details?id=com.google.android.apps.magazines&hl=en&gl=US&pli=1) [@pulzarraider](https://github.com/pulzarraider) -- Add YaDirectFetcher, amaya + +- Fix "news" pattern to allow [Google News Android App](https://play.google.com/store/apps/details?id=com.google.android.apps.magazines&hl=en&gl=US&pli=1) [@pulzarraider](https://github.com/pulzarraider) +- Add YaDirectFetcher, amaya ## [3.6.10](https://github.com/omrilotan/isbot/compare/v3.6.9...v3.6.10) -- Adjust the "client" substring pattern + +- Adjust the "client" substring pattern ## [3.6.9](https://github.com/omrilotan/isbot/compare/v3.6.8...v3.6.9) -- Adjust GOGGalaxy pattern -- Update built files + +- Adjust GOGGalaxy pattern +- Update built files ## [3.6.8](https://github.com/omrilotan/isbot/compare/v3.6.7...v3.6.8) -- Add Speedcurve: [Maximilian Haupt](https://github.com/0x7f) -- Exclude specific "client" substrings + +- Add Speedcurve: [Maximilian Haupt](https://github.com/0x7f) +- Exclude specific "client" substrings ## [3.6.7](https://github.com/omrilotan/isbot/compare/v3.6.6...v3.6.7) -- Add PhantomJS substring + +- Add PhantomJS substring ## [3.6.6](https://github.com/omrilotan/isbot/compare/v3.6.5...v3.6.6) -- Add CryptoAPI to known bots list -- Add Pageburst + +- Add CryptoAPI to known bots list +- Add Pageburst ## [3.6.6](https://github.com/omrilotan/isbot/compare/v3.6.5...v3.6.6) -- Add CryptoAPI to known bots list + +- Add CryptoAPI to known bots list ## [3.6.5](https://github.com/omrilotan/isbot/compare/v3.6.4...v3.6.5) -- Improvement: List reduced by >50 patterns for a better one-word pattern + +- Improvement: List reduced by >50 patterns for a better one-word pattern ## [3.6.4](https://github.com/omrilotan/isbot/compare/v3.6.3...v3.6.4) -- Add [Fuzz Faster U Fool](https://github.com/ffuf/ffuf) + +- Add [Fuzz Faster U Fool](https://github.com/ffuf/ffuf) ## [3.6.3](https://github.com/omrilotan/isbot/compare/v3.6.2...v3.6.3) -- Adjust single word pattern: Add brackets + +- Adjust single word pattern: Add brackets ## [3.6.2](https://github.com/omrilotan/isbot/compare/v3.6.1...v3.6.2) -- Recognise Uptime-Kuma/1.18.0 -- Reintroduce Yandex Search app exclusion + +- Recognise Uptime-Kuma/1.18.0 +- Reintroduce Yandex Search app exclusion ## [3.6.1](https://github.com/omrilotan/isbot/compare/v3.6.0...v3.6.1) -- Edit list and exception patterns (more bots, simpler pattern) + +- Edit list and exception patterns (more bots, simpler pattern) ## [3.6.0](https://github.com/omrilotan/isbot/compare/v3.5.4...v3.6.0) -- Expose a copy of the regular expression pattern via isbot.pattern getter + +- Expose a copy of the regular expression pattern via isbot.pattern getter ## [3.5.4](https://github.com/omrilotan/isbot/compare/v3.5.3...v3.5.4) -- Add strings starting with the word "nginx" + +- Add strings starting with the word "nginx" ## [3.5.3](https://github.com/omrilotan/isbot/compare/v3.5.2...v3.5.3) -- Fix for "Google Pixel" combination -- Add strings starting with "custom" + +- Fix for "Google Pixel" combination +- Add strings starting with "custom" ## [3.5.2](https://github.com/omrilotan/isbot/compare/v3.5.1...v3.5.2) -- Build supports more interpolation (transform class etc.) + +- Build supports more interpolation (transform class etc.) ## [3.5.1](https://github.com/omrilotan/isbot/compare/v3.5.0...v3.5.1) -- Add SERP (Search Engine Results Pages) Reputation Management tools + +- Add SERP (Search Engine Results Pages) Reputation Management tools ## [3.5.0](https://github.com/omrilotan/isbot/compare/v3.4.8...v3.5.0) -- Specify browser and node entries for require and import (resolves issue with jest 28) + +- Specify browser and node entries for require and import (resolves issue with jest 28) ## [3.4.8](https://github.com/omrilotan/isbot/compare/v3.4.7...v3.4.8) -- Replace single space pattern with literal white space, which is more efficient -- Add a more generic identifier to simplified user agent names + +- Replace single space pattern with literal white space, which is more efficient +- Add a more generic identifier to simplified user agent names ## [3.4.7](https://github.com/omrilotan/isbot/compare/v3.4.6...v3.4.7) -- Add Zoom Webhook + +- Add Zoom Webhook ## [3.4.6](https://github.com/omrilotan/isbot/compare/v3.4.5...v3.4.6) -- Add nodejs native agent (undici) -- Add random long string + +- Add nodejs native agent (undici) +- Add random long string ## [3.4.5](https://github.com/omrilotan/isbot/compare/v3.4.4...v3.4.5) -- Add CF-UC web crawler -- Add TagInspector -- Add Request-Pomise + +- Add CF-UC web crawler +- Add TagInspector +- Add Request-Pomise ## [3.4.4](https://github.com/omrilotan/isbot/compare/v3.4.3...v3.4.4) -- Add [Morningscore](https://morningscore.io/) + +- Add [Morningscore](https://morningscore.io/) ## [3.4.3](https://github.com/omrilotan/isbot/compare/v3.4.2...v3.4.3) -- Add Postman + +- Add Postman ## [3.4.2](https://github.com/omrilotan/isbot/compare/v3.4.1...v3.4.2) -- Add generic term: "proxy" -- Optimise "email" rule -- Add Rexx + +- Add generic term: "proxy" +- Optimise "email" rule +- Add Rexx ## [3.4.1](https://github.com/omrilotan/isbot/compare/v3.4.0...v3.4.1) -- Add recognised bots user agent patterns + +- Add recognised bots user agent patterns ## [3.4.0](https://github.com/omrilotan/isbot/compare/v3.3.4...v3.4.0) -- Add "matches" and "clear" to interface -- Recognise axios/ user agent as bot + +- Add "matches" and "clear" to interface +- Recognise axios/ user agent as bot ## [3.3.4](https://github.com/omrilotan/isbot/compare/v3.3.3...v3.3.4) -- Add "package.json" to exports ([#165](https://github.com/omrilotan/isbot/pull/165)) by [javivelasco](https://github.com/omrilotan/isbot/commits?author=javivelasco) + +- Add "package.json" to exports ([#165](https://github.com/omrilotan/isbot/pull/165)) by [javivelasco](https://github.com/omrilotan/isbot/commits?author=javivelasco) ## [3.3.3](https://github.com/omrilotan/isbot/compare/v3.3.2...v3.3.3) -- Add generic patterns (name/version) reduces pattern list size by >20% -- Internal formatting + +- Add generic patterns (name/version) reduces pattern list size by >20% +- Internal formatting ## [3.3.2](https://github.com/omrilotan/isbot/compare/v3.3.1...v3.3.2) -- Remove const keyword from build (Fix) + +- Remove const keyword from build (Fix) ## [3.3.1](https://github.com/omrilotan/isbot/compare/v3.3.0...v3.3.1) -- Fix in type definition + +- Fix in type definition ## [3.3.0](https://github.com/omrilotan/isbot/compare/v3.2.4...v3.3.0) -- Add "spawn" interface + +- Add "spawn" interface ## [3.2.4](https://github.com/omrilotan/isbot/compare/v3.2.3...v3.2.4) -- Add some RSS readers detection + +- Add some RSS readers detection ## [3.2.3](https://github.com/omrilotan/isbot/compare/v3.2.2...v3.2.3) -- Refine amiga user agent detection + +- Refine amiga user agent detection ## [3.2.2](https://github.com/omrilotan/isbot/compare/v3.2.1...v3.2.2) -- One mode duckduckgo pattern + +- One mode duckduckgo pattern ## [3.2.1](https://github.com/omrilotan/isbot/compare/v3.2.0...v3.2.1) -- Add bitdiscovery, Invision bot, ddg_android (duckduckgo), Braze, [gobuster](https://github.com/OJ/gobuster) + +- Add bitdiscovery, Invision bot, ddg_android (duckduckgo), Braze, [gobuster](https://github.com/OJ/gobuster) ## [3.2.0](https://github.com/omrilotan/isbot/compare/v3.1.0...v3.2.0) + ### New features -- Typescript definition (isbot) supports any. Where a non-string argument is cast to a string before execution + +- Typescript definition (isbot) supports any. Where a non-string argument is cast to a string before execution ## [3.1.0](https://github.com/omrilotan/isbot/compare/v3.0.27...v3.1.0) + ### New features -- Native support for ESM and CommonJS -- Start maintaining a security policy + +- Native support for ESM and CommonJS +- Start maintaining a security policy ### List updates -- Remove WAPCHOI from bot list -- Recognise Google/google user agent for Android webview + +- Remove WAPCHOI from bot list +- Recognise Google/google user agent for Android webview ## [3.0.27](https://github.com/omrilotan/isbot/compare/v3.0.26...v3.0.27) -- Add a few known crawlers + +- Add a few known crawlers ## [3.0.26](https://github.com/omrilotan/isbot/compare/v3.0.25...v3.0.26) -- Open source projects with indication to github.com + +- Open source projects with indication to github.com ## [3.0.25](https://github.com/omrilotan/isbot/compare/v3.0.24...v3.0.25) -- Address webview "Channel/googleplay", "GoogleApp/" -- Add 4 more bot patterns -- Stop treating Splash browser as bot + +- Address webview "Channel/googleplay", "GoogleApp/" +- Add 4 more bot patterns +- Stop treating Splash browser as bot ## [3.0.24](https://github.com/omrilotan/isbot/compare/v3.0.23...v3.0.24) -- Add Prometheus new user agent (prometheus) -- Add RestSharp .NET HTTP client -- Add M2E Pro Cron Service -- Add Deluge -- Deprecate asafaweb.com (EOL) + +- Add Prometheus new user agent (prometheus) +- Add RestSharp .NET HTTP client +- Add M2E Pro Cron Service +- Add Deluge +- Deprecate asafaweb.com (EOL) ## [3.0.23](https://github.com/omrilotan/isbot/compare/v3.0.22...v3.0.23) -- Recognise Mozilla MozacFetch as natural non bot browser + +- Recognise Mozilla MozacFetch as natural non bot browser ## [3.0.22](https://github.com/omrilotan/isbot/compare/v3.0.21...v3.0.22) -- Add generic term: "manager" + +- Add generic term: "manager" ## [3.0.21](https://github.com/omrilotan/isbot/compare/v3.0.20...v3.0.21) -- Reduce pattern complexity + +- Reduce pattern complexity ## [3.0.20](https://github.com/omrilotan/isbot/compare/v3.0.19...v3.0.20) -- Add Anonymous and bit.ly + +- Add Anonymous and bit.ly ## [3.0.19](https://github.com/omrilotan/isbot/compare/v3.0.18...v3.0.19) -- Fix: It's not needed to download fixtures at postinstall + +- Fix: It's not needed to download fixtures at postinstall ## [3.0.18](https://github.com/omrilotan/isbot/compare/v3.0.17...v3.0.18) -- Add [AngleSharp parser](https://github.com/AngleSharp/AngleSharp) -- Some Maintenance + +- Add [AngleSharp parser](https://github.com/AngleSharp/AngleSharp) +- Some Maintenance ## [3.0.17](https://github.com/omrilotan/isbot/compare/v3.0.16...v3.0.17) -- Add Neustar WPM -- Internal change accommodates TypeScript compiler + +- Add Neustar WPM +- Internal change accommodates TypeScript compiler ## [3.0.16](https://github.com/omrilotan/isbot/compare/v3.0.15...v3.0.16) -- Add pagespeed (Serf) -- Add SmallProxy -- Add CaptiveNetworkSupport + +- Add pagespeed (Serf) +- Add SmallProxy +- Add CaptiveNetworkSupport ## [3.0.15](https://github.com/omrilotan/isbot/compare/v3.0.14...v3.0.15) -- Recognise a bunch of more bots -- Optimise some of the list so we still have the same length + +- Recognise a bunch of more bots +- Optimise some of the list so we still have the same length ## [3.0.14](https://github.com/omrilotan/isbot/compare/v3.0.13...v3.0.14) -- Add Gozilla -- Add PerimeterX Integration Services + +- Add Gozilla +- Add PerimeterX Integration Services ## [3.0.13](https://github.com/omrilotan/isbot/compare/v3.0.12...v3.0.13) -- Add Kubernetes probe bot (ping and health-check) @simonecorsi + +- Add Kubernetes probe bot (ping and health-check) @simonecorsi ## [3.0.12](https://github.com/omrilotan/isbot/compare/v3.0.11...v3.0.12) -- Add [Discourse onebox](https://github.com/discourse/onebox) is used for link preview generation -- [Alexa Voice Service](https://github.com/alexa/avs-device-sdk) -- Reduce complexity by funding more common patterns + +- Add [Discourse onebox](https://github.com/discourse/onebox) is used for link preview generation +- [Alexa Voice Service](https://github.com/alexa/avs-device-sdk) +- Reduce complexity by funding more common patterns ## [3.0.11](https://github.com/omrilotan/isbot/compare/v3.0.10...v3.0.11) -- Add 5538 known crawler user agent strings from [myip.ms](https://www.myip.ms) -- Reduce complexity by 79 by introducing "https?:" pattern + +- Add 5538 known crawler user agent strings from [myip.ms](https://www.myip.ms) +- Reduce complexity by 79 by introducing "https?:" pattern ## [3.0.10](https://github.com/omrilotan/isbot/compare/v3.0.9...v3.0.10) -- Add [Sistrix (SEO)](https://www.sistrix.com/) -- JavaOS (Discontinued in 2003) + +- Add [Sistrix (SEO)](https://www.sistrix.com/) +- JavaOS (Discontinued in 2003) ## [3.0.9](https://github.com/omrilotan/isbot/compare/v3.0.8...v3.0.9) -- Add Shared Web Credentials tool -- Add Java runtime request -- Add [2GDPR](https://2gdpr.com/) -- Add GetRight -- Add [Pompos](http://pompos.iliad.fr) + +- Add Shared Web Credentials tool +- Add Java runtime request +- Add [2GDPR](https://2gdpr.com/) +- Add GetRight +- Add [Pompos](http://pompos.iliad.fr) ## [3.0.8](https://github.com/omrilotan/isbot/compare/v3.0.7...v3.0.8) -- Add [SignalR client](https://github.com/dotnet/aspnetcore/search?q=signalr&unscoped_q=signalr) -- Add FirePHP -- Reduce complexity for UAs containing "amiga" (by 3) -- Reduce complexity for UAs containing "download" (by 2) + +- Add [SignalR client](https://github.com/dotnet/aspnetcore/search?q=signalr&unscoped_q=signalr) +- Add FirePHP +- Reduce complexity for UAs containing "amiga" (by 3) +- Reduce complexity for UAs containing "download" (by 2) ## [3.0.7](https://github.com/omrilotan/isbot/compare/v3.0.6...v3.0.7) -- Reduce pattern complexity by 14 + +- Reduce pattern complexity by 14 ## [3.0.6](https://github.com/omrilotan/isbot/compare/v3.0.5...v3.0.6) -- Respond to crawler user agents added to user-agents.net/bots -- ApplicationHealthService: Ping Service + +- Respond to crawler user agents added to user-agents.net/bots +- ApplicationHealthService: Ping Service ## [3.0.5](https://github.com/omrilotan/isbot/compare/v3.0.4...v3.0.5) -- Respond to crawler user agents added to user-agents.net/bots -- Add [Rigor synthetic monitoring](https://rigor.com/) + +- Respond to crawler user agents added to user-agents.net/bots +- Add [Rigor synthetic monitoring](https://rigor.com/) ## [3.0.4](https://github.com/omrilotan/isbot/compare/v3.0.3...v3.0.4) -- [`Hexometer`](https://hexometer.com/) -- Respond to crawler user agents added to user-agents.net/bots -- Add an "ignoreList" to exclude user agents from user-agents.net + +- [`Hexometer`](https://hexometer.com/) +- Respond to crawler user agents added to user-agents.net/bots +- Add an "ignoreList" to exclude user agents from user-agents.net ## [3.0.3](https://github.com/omrilotan/isbot/compare/v3.0.2...v3.0.3) ### Add bots -- Respond to crawler user agents added to user-agents.net/bots + +- Respond to crawler user agents added to user-agents.net/bots ## [3.0.2](https://github.com/omrilotan/isbot/compare/v3.0.1...v3.0.2) ### Optimise pattern list + Combine all google products: Google browsers' user agent do not contain the word "Google". ### Add bots -- M4A1-WAPCHOI/2.0 (Java; U; MIDP-2.0; vi; NokiaC5-00.2) WAPCHOI/1.0.0 UCPro/9.4.1.377 U2/1.0.0 Mobile UNTRUSTED/1.0 3gpp-gba -- Mozilla/5.0 (compatible; Domains Project/1.0.3; +https://github.com/tb0hdan/domains) + +- M4A1-WAPCHOI/2.0 (Java; U; MIDP-2.0; vi; NokiaC5-00.2) WAPCHOI/1.0.0 UCPro/9.4.1.377 U2/1.0.0 Mobile UNTRUSTED/1.0 3gpp-gba +- Mozilla/5.0 (compatible; Domains Project/1.0.3; +https://github.com/tb0hdan/domains) Overall reduces list by 25 rules (from 345 rules to 320) ## [3.0.1](https://github.com/omrilotan/isbot/compare/v3.0.0...v3.0.1) ### Crawlers list update + Add patterns for: -- Google WebLight Proxy -- HighWinds Content Delivery System -- [Hydra by addthis](https://github.com/addthis/hydra) -- [RebelMouse](https://www.rebelmouse.com/rebelmouse-public-api) -- Scanners: Jorgee Vulnerability, ClamAV Website, Burp Collaborator -- Monitoring services: Xymon, AlertSite, Hobbit, updown.io, Monit, Dotcom + +- Google WebLight Proxy +- HighWinds Content Delivery System +- [Hydra by addthis](https://github.com/addthis/hydra) +- [RebelMouse](https://www.rebelmouse.com/rebelmouse-public-api) +- Scanners: Jorgee Vulnerability, ClamAV Website, Burp Collaborator +- Monitoring services: Xymon, AlertSite, Hobbit, updown.io, Monit, Dotcom ### Testing + Add some legit browser user-agent strings Fix periodic tests environment Add [a tester page](https://isbot.js.org) to check user agents easily @@ -283,12 +387,15 @@ Add [a tester page](https://isbot.js.org) to check user agents easily The API and code **has not changed** ### Breaking changes -- Remove testing on node 6 and 8 -- Some crawlers list updates can potentially change identification + +- Remove testing on node 6 and 8 +- Some crawlers list updates can potentially change identification ### Non breaking changes -- Improve efficiency of rule by optimising some parts and **removing** others + +- Improve efficiency of rule by optimising some parts and **removing** others ### Testing -- Automatically download crawlers lists for verification -- Add tests to improve efficiency + +- Automatically download crawlers lists for verification +- Add tests to improve efficiency diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 3eecc623..e45c1afa 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,4 +1,3 @@ - # Code of Conduct Please be nice, considerate, and respectful. I welcome any kind of input and will try to respond quickly. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4be62fe2..2cfe2041 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,16 +10,18 @@ Please feel free to [open an issue](https://github.com/omrilotan/isbot/issues/ne ## Pattern verification tests -Prepare lists of user agent strings before testing +Prepare lists of user agent strings before testing. This should happen automatically the first time, but it's recommended to update the lists before running tests. + ```js npm run prepare -- -f ``` ### Lists -- Manual legit browsers list: [fixtures/browsers.yml](https://github.com/omrilotan/isbot/blob/main/fixtures/browsers.yml) -- Manual known crawlers list: [fixtures/crawlers.yml](https://github.com/omrilotan/isbot/blob/main/fixtures/crawlers.yml) -- Downloaded resources end up in [fixtures/downloads](https://github.com/omrilotan/isbot/blob/main/fixtures/downloads) folder -- [user-agents](https://www.npmjs.com/package/user-agents) package is used to add randomly generated browser user agents + +- Manual legit browsers list: [fixtures/browsers.yml](https://github.com/omrilotan/isbot/blob/main/fixtures/browsers.yml) +- Manual known crawlers list: [fixtures/crawlers.yml](https://github.com/omrilotan/isbot/blob/main/fixtures/crawlers.yml) +- Downloaded resources end up in [fixtures/downloads](https://github.com/omrilotan/isbot/blob/main/fixtures/downloads) folder +- [user-agents](https://www.npmjs.com/package/user-agents) package is used to add randomly generated browser user agents ### Misidentification diff --git a/LICENSE b/LICENSE index 68a49daa..ede9e4f3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,5 @@ +# Unlicense + This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or diff --git a/README.md b/README.md index 4c80bb9d..99bc4c1b 100644 --- a/README.md +++ b/README.md @@ -4,25 +4,34 @@ [![](./page/isbot.svg)](https://isbot.js.org) -Detect bots/crawlers/spiders using the user agent string. +Recognise bots/crawlers/spiders using the user agent string. + +> ## Migrate to version 4 today +> +> `npm i isbot@4` or `npm i isbot@next` ## Usage -```js -import isbot from 'isbot' +```ts +import { isbot } from "isbot"; // Nodejs HTTP -isbot(request.getHeader('User-Agent')) +isbot(request.getHeader("User-Agent")); // ExpressJS -isbot(req.get('user-agent')) +isbot(req.get("user-agent")); // Browser -isbot(navigator.userAgent) +isbot(navigator.userAgent); // User Agent string -isbot('Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)') // true -isbot('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') // false +isbot( + "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", +); // true + +isbot( + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", +); // false ``` Using JSDeliver CDN you can import an iife script @@ -30,124 +39,111 @@ Using JSDeliver CDN you can import an iife script > See specific versions https://www.jsdelivr.com/package/npm/isbot or https://cdn.jsdelivr.net/npm/isbot ```html - -// isbot is global -isbot(navigator.userAgent) -``` - -## Additional functionality - -### Extend: Add user agent patterns -Add rules to user agent match RegExp: Array of strings - -```js -isbot('Mozilla/5.0 (X11) Firefox/111.0') // false -isbot.extend([ - 'istat', - 'x11' -]) -isbot('Mozilla/5.0 (X11) Firefox/111.0') // true -``` - -### Exclude: Remove matches of known crawlers -Remove rules to user agent match RegExp (see existing rules in `src/list.json` file) - -> This function requires konwnledge of the internal structure of the list - which may change at any time. It is recommended to use the `clear` function instead - -```js -isbot('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Safari/537.36 Chrome-Lighthouse') // true -isbot.exclude(['chrome-lighthouse']) // pattern is case insensitive -isbot('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.2 Safari/537.36 Chrome-Lighthouse') // false + +// isbot is global isbot(navigator.userAgent) ``` -### Find: Verbose result -Return the respective match for bot user agent rule -```js -isbot.find('Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0 DejaClick/2.9.7.2') // 'DejaClick' +## Additional named imports + +| import | Type | Description | +| ------------- | --------------------------------------------------- | ------------------------------------------------------------------------- | +| pattern | _{RegExp}_ | The regular expression used to identify bots | +| list | _{string[]}_ | List of all individual pattern parts | +| isbotMatch | _{(userAgent: string): string \| null}_ | The substring matched by the regular expression | +| isbotMatches | _{(userAgent: string): string[]}_ | All substrings matched by the regular expression | +| isbotPattern | _{(userAgent: string): string \| null}_ | The regular expression used to identify bot substring in the user agent | +| isbotPatterns | _{(userAgent: string): string[]}_ | All regular expressions used to identify bot substrings in the user agent | +| createIsbot | _{(pattern: RegExp): (userAgent: string): boolean}_ | Create a custom isbot function | + +## Examples + +### Create a custom isbot function ignoring Chrome Lighthouse + +```ts +import { createIsbot, list } from "isbot"; + +const isbot = createIsbot( + new RegExp( + list + .filter((record) => !new RegExp(record, "i").test("Chrome-Lighthouse")) + .join("|"), + "i", + ), +); ``` -### Matches: Get patterns -Return all patterns that match the user agent string -```js -isbot.matches('Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0 SearchRobot/1.0') // ['bot', 'search'] -``` +### Create a custom isbot function including another pattern -### Clear: -Remove all matching patterns so this user agent string will pass -```js -const ua = 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0 SearchRobot/1.0'; -isbot(ua) // true -isbot.clear(ua) -isbot(ua) // false -``` +```ts +import { createIsbot, list } from "isbot"; -### Spawn: Create new instances -Create new instances of isbot. Instance is spawned using spawner's list as base -```js -const one = isbot.spawn() -const two = isbot.spawn() - -two.exclude(['chrome-lighthouse']) -one('Chrome-Lighthouse') // true -two('Chrome-Lighthouse') // false -``` -Create isbot using custom list (**instead** of the maintained list) -```js -const lean = isbot.spawn([ 'bot' ]) -lean('Googlebot') // true -lean('Chrome-Lighthouse') // false -``` - -### Get a copy of the Regular Expression pattern -```js -const { pattern } = isbot +const isbot = createIsbot(new RegExp(list.concat("shmulik").join("|"), "i")); ``` ## Definitions -- **Bot.** Autonomous program imitating or replacing some aspect of a human behaviour, performing repetitive tasks much faster than human users could. -- **Good bot.** Automated programs who visit websites in order to collect useful information. Web crawlers, site scrapers, stress testers, preview builders and other programs are welcomed on most websites because they serve purposes of mutual benefits. -- **Bad bot.** Programs which are designed to perform malicious actions, ultimately hurting businesses. Testing credential databases, DDoS attacks, spam bots. + +- **Bot.** Autonomous program imitating or replacing some aspect of a human behaviour, performing repetitive tasks much faster than human users could. +- **Good bot.** Automated programs who visit websites in order to collect useful information. Web crawlers, site scrapers, stress testers, preview builders and other programs are welcomed on most websites because they serve purposes of mutual benefits. +- **Bad bot.** Programs which are designed to perform malicious actions, ultimately hurting businesses. Testing credential databases, DDoS attacks, spam bots. ## Clarifications + ### What does "isbot" do? + This package aims to identify "Good bots". Those who voluntarily identify themselves by setting a unique, preferably descriptive, user agent, usually by setting a dedicated request header. ### What doesn't "isbot" do? + It does not try to recognise malicious bots or programs disguising themselves as real users. ### Why would I want to identify good bots? + Recognising good bots such as web crawlers is useful for multiple purposes. Although it is not recommended to serve different content to web crawlers like Googlebot, you can still elect to -- Flag pageviews to consider with **business analysis**. -- Prefer to serve cached content and **relieve service load**. -- Omit third party solutions' code (tags, pixels) and **reduce costs**. -> It is not recommended to whitelist requests for any reason based on user agent header only. Instead other methods of identification can be added such as [reverse dns lookup](https://www.npmjs.com/package/reverse-dns-lookup). + +- Flag pageviews to consider with **business analysis**. +- Prefer to serve cached content and **relieve service load**. +- Omit third party solutions' code (tags, pixels) and **reduce costs**. + > It is not recommended to whitelist requests for any reason based on user agent header only. Instead other methods of identification can be added such as [reverse dns lookup](https://www.npmjs.com/package/reverse-dns-lookup). ## Data sources We use external data sources on top of our own lists to keep up to date ### Crawlers user agents: -- [user-agents.net](https://user-agents.net/bots) -- [crawler-user-agents repo](https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json) -- [myip.ms](https://www.myip.ms/files/bots/live_webcrawlers.txt) -- [matomo.org](https://github.com/matomo-org/device-detector/blob/master/Tests/fixtures/bots.yml) -- A Manual list + +- [user-agents.net](https://user-agents.net/bots) +- [crawler-user-agents repo](https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json) +- [myip.ms](https://www.myip.ms/files/bots/live_webcrawlers.txt) +- [matomo.org](https://github.com/matomo-org/device-detector/blob/master/Tests/fixtures/bots.yml) +- A Manual list ### Non bot user agents: -- [user-agents npm package](https://www.npmjs.com/package/user-agents) -- A Manual list + +- [user-agents npm package](https://www.npmjs.com/package/user-agents) +- A Manual list Missing something? Please [open an issue](https://github.com/omrilotan/isbot/issues/new/choose) ## Major releases breaking changes ([full changelog](./CHANGELOG.md)) +### [**Version 4**](https://github.com/omrilotan/isbot/releases/tag/v4.0.0) + +Remove `isbot` function default export in favour of a named export. + +```ts +import { isbot } from "isbot"; +``` + ### [**Version 3**](https://github.com/omrilotan/isbot/releases/tag/v3.0.0) + Remove testing for node 6 and 8 ### [**Version 2**](https://github.com/omrilotan/isbot/releases/tag/v2.0.0) + Change return value for isbot: `true` instead of matched string ### [**Version 1**](https://github.com/omrilotan/isbot/releases/tag/v1.0.0) + No functional change ## Real world data @@ -155,3 +151,7 @@ No functional change | Execution times in milliseconds | - | ![](https://user-images.githubusercontent.com/516342/125660283-c6ef9db8-6162-449b-912d-7b7ae97ef411.png) + +``` + +``` diff --git a/SECURITY.md b/SECURITY.md index 71033857..ddd53e47 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,12 +6,13 @@ The following versions are currently being supported with security updates. This repository is registered for scheduled periodic security scans. -| Version | Supported -| - | - -| >= 3.1 | :white_check_mark: -| < 3.1.0 | :x: +| Version | Supported | +| ------- | ------------------ | +| >= 3.1 | :white_check_mark: | +| < 3.1.0 | :x: | Status cheked with [Snyk](https://security.snyk.io/package/npm/isbot) + ## Reporting a Vulnerability If you discover a vulnerability that affects a version of the software, please [create an issue](https://github.com/omrilotan/isbot/issues/new/choose) to report it. diff --git a/babel.config.json b/babel.config.json deleted file mode 100644 index bec0d0bb..00000000 --- a/babel.config.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "presets": [ - [ - "@babel/preset-env" - ] - ], - "plugins": [ - "@babel/plugin-syntax-import-assertions" - ] -} diff --git a/fixtures/browsers.yml b/fixtures/browsers.yml index a64de007..da39e851 100644 --- a/fixtures/browsers.yml +++ b/fixtures/browsers.yml @@ -361,6 +361,7 @@ Naver Whale: - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.57 Whale/3.14.133.23 Safari/537.36 NCSA Mosaic: - NCSA_Mosaic/2.7b5 (X11;Linux 2.6.7 i686) libwww/2.12 modified + - NCSA_Mosaic/2.7ck11 (X11;OpenBSD 7.1 i386) libwww/2.12 modified NetFront: - SonyEricssonW810i/R46EA Browser/NetFront/3.3 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Link/6.3.0.0.0(Linux LLC 1.2) NetPositive: @@ -683,4 +684,3 @@ ZZZ Glitches and Misidentified Browsers - These browsers are legit user agent ev - Mozilla/5.0 (en-us) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13 - mWebView.getSettings().setUserAgentString(\x22Mozilla/5.0 (Amiga; U; AmigaOS 1.3; en; rv:1.8.1.19); - ozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/115.0.5790.166 Mobile DuckDuckGo/5 Safari/537.36 - - "POST /parser Host: user-agents.net action=parse&format=[json|xml]&string=Mozilla%2F5.0%20%28Linux%3B%20Android%2012%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F96.0.4664.104%20Mobile%20DuckDuckGo%2F5%20Safari%2F537.36" diff --git a/fixtures/crawlers.yml b/fixtures/crawlers.yml index aefce080..f2bebe10 100644 --- a/fixtures/crawlers.yml +++ b/fixtures/crawlers.yml @@ -890,5 +890,6 @@ ZZZ Miscellaneous Glitches and Errornous User Agent Strings: - Mozilla/5.0 (Linux; Android 10; FIG-AL10 Build/HUAWEIFIG-AL10; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/1214 MicroMessenger/7.0.14.1660(0x27000E39) Process/tools NetType/4G Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - Mozilla/5.0 (Linux; Android 10; M6 Note Build/N2G47H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/105.0.0.0MQQBrowser/6.2 TBS/045223 Mobile Safari/537.36 MMWEBID/9551 MicroMessenger/7.0.14.1660(0x27000E37) Process/tools NetType/4G Language/zh_CN ABI/arm64 WeChat/arm64 wechatdevtools qcloudcdn-xinan - pisya + - "POST /parser Host: user-agents.net action=parse&format=[json|xml]&string=Mozilla%2F5.0%20%28Linux%3B%20Android%2012%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F96.0.4664.104%20Mobile%20DuckDuckGo%2F5%20Safari%2F537.36" - search.marginalia.nu - U2FsdGVkX1+uKxeH2946/bMTDvtm/Fr0nWjvFR/oPtc64LSh1Gg0qkbJUIhLpSw5h/mjF86TFOXrl4U2SG1KBi4BC0EfphyIzeOxVXkpBWHDMfJnkrFGrubrRGjmJNIN49DKkOcjVgq2/iVDBMSAQe30k9wNIDtflfnlrOrmDPkXiYNjLbohSHLaNWS/GK5hu62gkOH25c9i1B+jMq5kc590HoQqJ0o4es9QrEnwluMsYPbQy14LxgPjeCQveiPHPXtkSM7TmfTY53HEJdbFHylstSOJNTQclbL67BKx33M= diff --git a/fixtures/index.ts b/fixtures/index.ts new file mode 100644 index 00000000..3f327b3a --- /dev/null +++ b/fixtures/index.ts @@ -0,0 +1,16 @@ +import UserAgent from "user-agents"; +import data from "./index.json"; + +const browsers: string[] = data.browsers; +const crawlers: string[] = data.crawlers; + +const random = Array(2000) + .fill(null) + .map(() => new UserAgent()) + .map(({ data: { userAgent: ua } }) => ua) + .filter((ua) => !crawlers.includes(ua)) + .filter(Boolean); + +browsers.push(...random.filter((ua) => !crawlers.includes(ua))); + +export { browsers, crawlers }; diff --git a/index.d.ts b/index.d.ts deleted file mode 100644 index f13a447a..00000000 --- a/index.d.ts +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Detect if a user agent is a bot, crawler or spider - * @param {string} ua A user agent string. Non strings will be cast to string before the check - * @returns {boolean} - */ -declare function isbot(ua: any): boolean; - -declare namespace isbot { - /** - * Extend the built-in list of bot user agent - * @param {string[]} filters An array of regular expression patterns - * @returns {void} - */ - function extend(filters: string[]): void; - - /** - * Removes a set of user agent from the built-in list - * This function requires konwnledge of the internal structure of the list - which may change at any time. It is recommended to use the `clear` function instead - * @param {string[]} filters An array of regular expression patterns - * @returns {void} - */ - function exclude(filters: string[]): void; - - /** - * Return the respective match for bot user agent rule - * @param {string} ua A user agent string - * @returns {string|null} - */ - function find(ua: string): string|null; - - /** - * Get the patterns that match user agent string if any - * @param {string} ua User Agent string - * @returns {string[]} - */ - function matches(ua: string): string[]; - - /** - * Clear all patterns that match user agent - * @param {string} ua User Agent string - * @returns {void} - */ - function clear(ua: string): void; - - /** - * Create a new isbot function complete with all its interface - * @param {string[]} list of strings representing regular expression patterns - * @returns isbot function with full interface - */ - function spawn(list?: string[]): typeof isbot; - - /** - * A copy of the regular expression pattern - */ - const pattern: RegExp; -} - -export = isbot; diff --git a/jest.config.js b/jest.config.js new file mode 100644 index 00000000..8d0bcb6c --- /dev/null +++ b/jest.config.js @@ -0,0 +1,4 @@ +module.exports = { + preset: "ts-jest", + testEnvironment: "node", +}; diff --git a/package.json b/package.json index c5695d7e..9cc5bf4f 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "isbot", - "version": "3.7.1", - "description": "πŸ€– detect bots/crawlers/spiders via the user agent.", + "version": "4.0.0", + "description": "πŸ€– Recognise bots/crawlers/spiders using the user agent string.", "keywords": [ "bot", "crawlers", @@ -18,8 +18,11 @@ "url": "https://github.com/omrilotan/isbot" }, "engines": { - "node": ">=12" + "node": ">=18" }, + "files": [ + "./index*" + ], "type": "commonjs", "main": "./index.js", "module": "./index.mjs", @@ -43,64 +46,27 @@ }, "types": "index.d.ts", "scripts": { - "clean": "rm -rf .cache docs fixtures/downloads fixtures/index.json tests/browser/dist.js tests/esm/node_modules /index.*", "prepare": "./scripts/prepare/index.js", + "build": "./scripts/build/procedure.sh", + "format": "./scripts/format/procedure.sh", + "pretest": "npm run build && npm run prepare", + "test": "node --expose-gc node_modules/.bin/jest --verbose", + "prepublishOnly": "./scripts/prepublish/procedure.sh", "prestart": "which parcel || npm i parcel-bundler --no-save", "start": "parcel page/index.pug --out-dir docs", "prepage": "which parcel || npm i parcel-bundler --no-save", - "pushmodules": "./scripts/pushmodules/index.sh", - "page": "parcel build page/index.pug --out-dir docs --public-url .", - "postpage": "echo isbot.js.org > docs/CNAME", - "authors": "./scripts/authors/index.js", - "sort": "./scripts/sort/index.js", - "ensure": "[ -e \"index.js\" ] || npm run build", - "build": "rollup --config .rollup.js", - "postbuild": "npm run symlink && npm run esm && npm run cjs && npm run browser", - "cjs": "mocha tests/cjs/spec.js", - "esm": "node --experimental-modules ./node_modules/.bin/mocha tests/esm/spec.js", - "pretest": "npm run prepare", - "test": "npm run spec && npm run efficiency", - "prespec": "npm run prepare", - "spec": "node --experimental-modules --experimental-json-modules ./node_modules/.bin/mocha tests/specs/spec.js", - "jest": "cd tests/jest && npm t", - "postjest": "cd ../..", - "preefficiency": "npm run prepare", - "efficiency": "node --expose-gc --experimental-modules --experimental-json-modules ./node_modules/.bin/mocha tests/efficiency/spec.js", - "symlink": "./scripts/symlink/index.js", - "prebrowser": "npm run ensure && npm run symlink && rollup --config tests/browser/rollup.js", - "browser": "karma start tests/browser/karma.js", - "lint": "standard --parser @babel/eslint-parser", - "remark": "remark .", - "prets": "npm run ensure", - "ts": "tsc", - "prepublishOnly": "npm run authors && npm run build" + "page": "parcel build page/index.pug --out-dir docs --public-url ./" }, "devDependencies": { - "@babel/eslint-parser": "^7.19.1", - "@babel/plugin-syntax-import-assertions": "^7.20.0", - "@babel/preset-env": "^7.20.2", - "@lets/wait": "^2.0.2", - "@rollup/plugin-babel": "^6.0.3", - "@rollup/plugin-json": "^6.0.0", - "@rollup/plugin-node-resolve": "^15.0.1", - "acorn-import-assertions": "^1.8.0", - "eslint-plugin-log": "^1.2.6", - "form-data": "^4.0.0", - "karma": "^6.4.1", - "karma-chrome-launcher": "^3.1.1", - "karma-cli": "^2.0.0", - "karma-firefox-launcher": "^2.1.1", - "karma-mocha": "^2.0.1", - "karma-mocha-reporter": "^2.2.5", - "mocha": "^10.2.0", - "pug": "^3.0.0", - "remark-cli": "^11.0.0", - "remark-preset-lint-recommended": "^6.0.0", - "rollup": "^3.10.0", - "standard": "^17.0.0", - "stdline": "^1.0.0", - "typescript": "^5.0.4", - "user-agents": "^1.0.1290", - "yaml": "^2.2.1" + "@types/jest": "^29.5.11", + "esbuild": "^0.19.9", + "jest": "^29.7.0", + "prettier": "^3.1.1", + "pug": "^3.0.2", + "stdline": "^1.1.1", + "ts-jest": "^29.1.1", + "typescript": "^5.3.3", + "user-agents": "^1.1.53", + "yaml": "^2.3.4" } } diff --git a/page/index.pug b/page/index.pug index 3b341706..7fa6abee 100644 --- a/page/index.pug +++ b/page/index.pug @@ -4,8 +4,8 @@ html(lang="en-GB") meta(charset="utf-8") meta(http-equiv="X-UA-Compatible" content="IE=edge, chrome=1") meta(name="viewport" content="width=device-width, initial-scale=1, user-scalable=yes") - meta(name="description" content="Check if user agent string belongs to a bot, crawlers, spiders") - title isbot πŸ€–/πŸ‘¨β€πŸ¦° + meta(name="description" content="πŸ€–/πŸ‘¨β€πŸ¦° Check if user agent string belongs to a bot, crawlers, spiders") + title isbot: Recognise bots/crawlers/spiders using the user agent string link(rel="search" type="application/opensearchdescription+xml" title="isbot check" href="/opensearch.xml") link(rel="author" href="https://github.com/omrilotan/isbot") link(rel="stylesheet" href="./styles.css") diff --git a/page/opensearch.xml b/page/opensearch.xml index cd22f7bf..ea9615de 100644 --- a/page/opensearch.xml +++ b/page/opensearch.xml @@ -1,7 +1,7 @@ isbot - isbot CHECK - isbot CHECK + isbot user agent match + isbot user agent match UTF-8 UTF-8 false diff --git a/page/script.js b/page/script.js index bc87d729..17b9c85e 100644 --- a/page/script.js +++ b/page/script.js @@ -1,87 +1,96 @@ -import isbot from '..' +import { isbot, isbotMatch, isbotPattern } from ".."; { - const textarea = document.querySelector('textarea') - const output = document.querySelector('output') - const copyLink = document.querySelector('[id="copy-link"]') - let timer + const textarea = document.querySelector("textarea"); + const output = document.querySelector("output"); + const copyLink = document.querySelector('[id="copy-link"]'); + let timer; - const url = new URL(window.location.href) - const ua = url.searchParams.get('ua') + const url = new URL(window.location.href); + const ua = url.searchParams.get("ua"); - textarea.childNodes.forEach(child => child.parentNode?.removeChild(child)) - textarea.appendChild(document.createTextNode(ua || navigator.userAgent)) - textarea.addEventListener('keyup', change) - textarea.addEventListener('paste', change) - textarea.addEventListener('focus', () => textarea.select()) - check() + textarea.childNodes.forEach((child) => child.parentNode?.removeChild(child)); + textarea.appendChild(document.createTextNode(ua || navigator.userAgent)); + textarea.addEventListener("keyup", change); + textarea.addEventListener("paste", change); + textarea.addEventListener("focus", () => textarea.select()); + check(); - function change ({ target: { value } }) { - clearTimeout(timer) - timer = setTimeout(check, 200, value) - } + function change({ target: { value } }) { + clearTimeout(timer); + timer = setTimeout(check, 200, value); + } - function append (parent, tag, string) { - if (tag) { - const ele = document.createElement('kbd') - ele.appendChild(document.createTextNode(string)) - parent.appendChild(ele) - } else { - parent.appendChild(document.createTextNode(string)) - } - } + function append(parent, tag, string) { + if (tag) { + const ele = document.createElement("kbd"); + ele.appendChild(document.createTextNode(string)); + parent.appendChild(ele); + } else { + parent.appendChild(document.createTextNode(string)); + } + } - function details (ua) { - const fragment = document.createDocumentFragment() - const is = isbot(ua) - const found = is && isbot.find(ua) - const pattern = found - ? isbot.matches(ua)?.find(pattern => new RegExp(pattern, 'i').test(found)) - : null + function details(ua) { + const fragment = document.createDocumentFragment(); + const is = isbot(ua); + const found = is && isbotMatch(ua); + const pattern = is && isbotPattern(ua); - is - ? append(fragment, null, 'I think so, yes\n') - : append(fragment, null, 'I don\'t think so, no\nI could not find a pattern I recognise') - found && append(fragment, null, 'The substring ') - found && append(fragment, 'kbd', found) - pattern && append(fragment, null, ' matches the pattern ') - pattern && append(fragment, 'kbd', pattern) - return fragment - } + if (is) { + append(fragment, null, "I think so, yes\n"); + append(fragment, null, "The substring "); + append(fragment, "kbd", found); + append(fragment, null, " matches the pattern "); + append(fragment, "kbd", pattern); + } else { + append( + fragment, + null, + "I don't think so, no\nI could not find a pattern I recognise", + ); + } + return fragment; + } - function check (value = textarea.innerHTML) { - value = value.trim() - while (output.firstChild) { - output.removeChild(output.firstChild) - } - if (value === '') { - output.appendChild( - document.createTextNode( - 'Insert user agent string in the text box' - ) - ) - return - } + function check(value = textarea.innerHTML) { + value = value.trim(); + while (output.firstChild) { + output.removeChild(output.firstChild); + } + if (value === "") { + output.appendChild( + document.createTextNode("Insert user agent string in the text box"), + ); + return; + } - output.appendChild(details(value)) - } + output.appendChild(details(value)); + } - copyLink.addEventListener('click', (event) => { - event.preventDefault() - const { protocol, host, pathname } = document.location - navigator.clipboard.writeText([ - protocol, '//', host, pathname, '?ua=', encodeURIComponent(textarea.value) - ].join('')) - const dialog = document.createElement('dialog') - dialog.appendChild(document.createTextNode('copied to clipboard')) - document.body.appendChild(dialog) - dialog.showModal() - setTimeout(() => { - dialog.addEventListener('transitionend', () => { - dialog.close() - document.body.removeChild(dialog) - }) - dialog.style.opacity = 0 - }, 1000) - }) + copyLink.addEventListener("click", (event) => { + event.preventDefault(); + const { protocol, host, pathname } = document.location; + navigator.clipboard.writeText( + [ + protocol, + "//", + host, + pathname, + "?ua=", + encodeURIComponent(textarea.value), + ].join(""), + ); + const dialog = document.createElement("dialog"); + dialog.appendChild(document.createTextNode("copied to clipboard")); + document.body.appendChild(dialog); + dialog.showModal(); + setTimeout(() => { + dialog.addEventListener("transitionend", () => { + dialog.close(); + document.body.removeChild(dialog); + }); + dialog.style.opacity = 0; + }, 1000); + }); } diff --git a/page/styles.css b/page/styles.css index 613e6476..db9deea9 100644 --- a/page/styles.css +++ b/page/styles.css @@ -1,117 +1,134 @@ :root { - --color-background: #e5e5e5; - --color-background-middle: #eee; - --color-background-highlight: #fff; - --color-foreground: #333; - --color-foreground-shadow: #0d275041; + --color-background: #e5e5e5; + --color-background-middle: #eee; + --color-background-highlight: #fff; + --color-foreground: #333; + --color-foreground-shadow: #0d275041; } * { - margin: 0; - padding: 0; + margin: 0; + padding: 0; } -html, textarea, button { - font: 100 100%/1.4 -apple-system, "BlinkMacSystemFont", "Helvetica Neue", "Helvetica", "Lucida Grande", "Arial", sans-serif; +html, +textarea, +button { + font: + 100 100%/1.4 -apple-system, + "BlinkMacSystemFont", + "Helvetica Neue", + "Helvetica", + "Lucida Grande", + "Arial", + sans-serif; } html, body { - min-height: 100%; - display: flex; - justify-content: center; - align-items: center; + min-height: 100%; + display: flex; + justify-content: center; + align-items: center; } -body, textarea { - background: var(--color-background); - color: var(--color-foreground); - text-shadow: var(--color-background-highlight) 1px 1px 0; +body, +textarea { + background: var(--color-background); + color: var(--color-foreground); + text-shadow: var(--color-background-highlight) 1px 1px 0; } body { - background: linear-gradient(145deg, var(--color-background-highlight), var(--color-background-middle), var(--color-background)); + background: linear-gradient( + 145deg, + var(--color-background-highlight), + var(--color-background-middle), + var(--color-background) + ); } form { - width: 90vw; - max-width: 1400px; - padding: 1em 0; - text-align: center; + width: 90vw; + max-width: 1400px; + padding: 1em 0; + text-align: center; } h1, textarea, output { - margin-bottom: 1em; + margin-bottom: 1em; } h1 { - font-weight: 700; - font-size: 3em; + font-weight: 700; + font-size: 3em; } textarea { - font-size: 1.7em; - width: 100%; - height: 5em; - padding: .5em; - resize: vertical; - box-sizing: border-box; + font-size: 1.7em; + width: 100%; + height: 5em; + padding: 0.5em; + resize: vertical; + box-sizing: border-box; } textarea { - border-radius: .5em; - box-shadow: inset var(--color-foreground-shadow) .5em .5em .5em, - inset var(--color-background-highlight) -.5em -.5em .5em; - border-width: 0; + border-radius: 0.5em; + box-shadow: + inset var(--color-foreground-shadow) 0.5em 0.5em 0.5em, + inset var(--color-background-highlight) -0.5em -0.5em 0.5em; + border-width: 0; } output { - display: block; - white-space: pre-line; - font-size: 2em; + display: block; + white-space: pre-line; + font-size: 2em; } kbd { - display: inline-block; - background: var(--color-background-middle); - font-size: 0.8em; - padding: 0 0.2em; + display: inline-block; + background: var(--color-background-middle); + font-size: 0.8em; + padding: 0 0.2em; } button { - font-size: 1.1em; - padding: 0.5em 1em; - border-radius: 0.5em; - border-width: 0; - color: var(--color-foreground); - text-shadow: var(--color-background-highlight) 1px 1px 0; - background: var(--color-background); - color: var(--color-foreground); - box-shadow: var(--color-foreground-shadow) .1em .1em .1em, - inset var(--color-background-highlight) .1em .1em .1em, - inset var(--color-foreground-shadow) -.1em -.1em .1em; - cursor: pointer; - margin: 0 0 1em; + font-size: 1.1em; + padding: 0.5em 1em; + border-radius: 0.5em; + border-width: 0; + color: var(--color-foreground); + text-shadow: var(--color-background-highlight) 1px 1px 0; + background: var(--color-background); + color: var(--color-foreground); + box-shadow: + var(--color-foreground-shadow) 0.1em 0.1em 0.1em, + inset var(--color-background-highlight) 0.1em 0.1em 0.1em, + inset var(--color-foreground-shadow) -0.1em -0.1em 0.1em; + cursor: pointer; + margin: 0 0 1em; } dialog { - position: fixed; - top: 10vh; - left: 40vw; - width: 20vw; - padding: 1em; - background: var(--color-foreground); - color: var(--color-background-highlight); - text-shadow: none; - text-align: center; - border-radius: 1em; - opacity: 0; - transition: opacity .5s ease-in-out; - box-shadow: var(--color-foreground-shadow) 0 0 0 .5em; + position: fixed; + top: 10vh; + left: 40vw; + width: 20vw; + padding: 1em; + background: var(--color-foreground); + color: var(--color-background-highlight); + text-shadow: none; + text-align: center; + border-radius: 1em; + opacity: 0; + transition: opacity 0.5s ease-in-out; + box-shadow: var(--color-foreground-shadow) 0 0 0 0.5em; } dialog[open] { - opacity: 1; + opacity: 1; } diff --git a/scripts/authors/index.js b/scripts/authors/index.js index afde5dc3..3ed6ad8a 100755 --- a/scripts/authors/index.js +++ b/scripts/authors/index.js @@ -1,29 +1,36 @@ #!/usr/bin/env node -const { promises: { writeFile } } = require('fs') -const { join } = require('path') -const client = require('../lib/client') -const sortBy = require('./sortBy') +import { writeFile } from "node:fs/promises"; -start() +/** + * sortBy Sort a list of objects by the value of a key + * @param {object[]} list + * @param {string} key + * @returns {object[]} + */ +const sortBy = (list, key) => + list.sort(function (a, b) { + const [_a, _b] = [a, b].map((i) => i[key]); + if (_a < _b) return 1; + if (_a > _b) return -1; + return 0; + }); -async function start () { - const response = await client({ - url: 'https://api.github.com/repos/omrilotan/isbot/contributors', - headers: { - 'Content-Type': 'application/json', - 'User-Agent': 'omrilotan/isbot' - } - }) +start(); - const contributors = sortBy( - JSON.parse(response), - 'contributions').map( - ({ login, html_url: url }) => `${login} (${url})\n` - ).join('') +async function start() { + const response = await fetch( + "https://api.github.com/repos/omrilotan/isbot/contributors", + { + headers: new Headers([ + ["Content-Type", "application/json"], + ["User-Agent", "omrilotan/isbot"], + ]), + }, + ); + const contributors = sortBy(await response.json(), "contributions") + .map(({ login, html_url: url }) => `${login} (${url})`) + .join("\n"); - await writeFile( - join(__dirname, '..', '..', 'AUTHORS'), - contributors - ) + await writeFile("AUTHORS", contributors); } diff --git a/scripts/authors/sortBy/index.js b/scripts/authors/sortBy/index.js deleted file mode 100644 index 09e9116f..00000000 --- a/scripts/authors/sortBy/index.js +++ /dev/null @@ -1,14 +0,0 @@ -/** - * sortBy Sort a list of objects by the value of a key - * @param {object[]} list - * @param {string} key - * @returns {object[]} - */ -module.exports = (list, key) => list.sort( - function (a, b) { - const [_a, _b] = [a, b].map(i => i[key]) - if (_a < _b) return 1 - if (_a > _b) return -1 - return 0 - } -) diff --git a/scripts/build/pattern.js b/scripts/build/pattern.js new file mode 100755 index 00000000..8bc2a699 --- /dev/null +++ b/scripts/build/pattern.js @@ -0,0 +1,12 @@ +#!/usr/bin/env node + +import { writeFile } from "node:fs/promises"; +import patterns from "../../src/patterns.json" assert { type: "json" }; + +const pattern = new RegExp(patterns.join("|"), "i").toString(); +const code = ` +export const regex: RegExp = ${pattern}; +export const parts: number = ${patterns.length}; +export const size: number = ${pattern.length}; +`.trim(); +await writeFile("src/pattern.ts", code); diff --git a/scripts/build/procedure.sh b/scripts/build/procedure.sh new file mode 100755 index 00000000..164b63e3 --- /dev/null +++ b/scripts/build/procedure.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +failures=0 + +echo "β†’ Build Regular Expression" +scripts/build/pattern.js +failures=$((failures + $?)) + +echo "β†’ Build commonjs" +esbuild src/index.ts --outfile=index.js --bundle --platform=neutral --format=cjs --log-level=warning +failures=$((failures + $?)) + +echo "β†’ Build esm" +esbuild src/index.ts --outfile=index.mjs --bundle --platform=neutral --format=esm --log-level=warning +failures=$((failures + $?)) + +echo "β†’ Build browser file (iife)" +esbuild src/browser.ts --outfile=index.iife.js --bundle --platform=neutral --format=iife --global-name=isbot --log-level=warning +failures=$((failures + $?)) + +echo "β†’ Build TypeScript declaration file" +tsc src/index.ts --declaration --emitDeclarationOnly --resolveJsonModule --esModuleInterop --outDir . +failures=$((failures + $?)) + +echo -e "β†’ Number of failures: ${failures}" +exit $failures diff --git a/scripts/format/procedure.sh b/scripts/format/procedure.sh new file mode 100755 index 00000000..94f180bc --- /dev/null +++ b/scripts/format/procedure.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +failures=0 + +echo "β†’ Sort lists" +./scripts/sort/index.js +failures=$((failures + $?)) + +echo "β†’ Format files" +prettier --write . +failures=$((failures + $?)) + +echo "β†’ Number of failures: ${failures}" +exit $failures diff --git a/scripts/gh-pages/procedure.sh b/scripts/gh-pages/procedure.sh new file mode 100755 index 00000000..eb232be6 --- /dev/null +++ b/scripts/gh-pages/procedure.sh @@ -0,0 +1,27 @@ +message="$(curl -s https://whatthecommit.com/index.txt)" +git config --global user.name "$(git show -s --format=%an)" +git config --global user.email "$(git show -s --format=%ae)" + +origin=$(git config --get remote.origin.url) +exists=$(git ls-remote --heads ${origin} gh-pages) + +if [ -z "$exists" ]; then + mkdir -p GHPAGES_DIR + cp -r .git GHPAGES_DIR + cd GHPAGES_DIR + git checkout -b gh-pages +else + git clone -b gh-pages --single-branch $origin GHPAGES_DIR + cd GHPAGES_DIR +fi + +ls | grep -v CNAME | xargs rm -rf +cd ../ + +cp -R ./docs ./GHPAGES_DIR + +cd GHPAGES_DIR +git add . +git commit -m "$message" +git push origin gh-pages +cd ../ diff --git a/scripts/lib/client/index.js b/scripts/lib/client/index.js deleted file mode 100644 index 5d2d9d26..00000000 --- a/scripts/lib/client/index.js +++ /dev/null @@ -1,75 +0,0 @@ -const { request } = require('https') -const FormData = require('form-data') - -/** - * Simple HTTP client implementation - * @param {string} ΓΈ.url - * @param {string} [ΓΈ.method='GET'] - * @param {object} [data] - * @param {object} [headers={}] - * @returns {Promise} response body - */ -module.exports = function client ({ url, method = 'GET', data, headers = {} } = {}) { - const { hostname, pathname, search } = new URL(url) - const path = [pathname, search].join('') - return new Promise( - (resolve, reject) => { - try { - const form = data - ? new FormData() - : null - - form && Object.entries(data).forEach( - entry => form.append(...entry) - ) - - const instance = request( - { - hostname, - port: 443, - path, - method, - headers: Object.assign( - (form && form.getHeaders()) || {}, - headers - ) - }, - response => { - if (Math.floor(response.statusCode / 100) !== 2) { - const error = new Error('Failed request') - error.url = url - error.status = [response.statusCode, response.statusText].filter(Boolean).join(' ') - error.code = response.statusCode - error.headers = JSON.stringify(response.headers) - reject(error) - return - } - - const chunks = [] - - response.on( - 'data', - chunk => chunks.push(chunk) - ) - response.on( - 'end', - () => resolve( - chunks.map( - chunk => chunk.toString() - ).join('') - ) - ) - } - ).on( - 'error', - reject - ) - - form && form.pipe(instance) - instance.end() - } catch (error) { - reject(error) - } - } - ) -} diff --git a/scripts/lib/dedup/index.js b/scripts/lib/dedup/index.js deleted file mode 100644 index 863468f5..00000000 --- a/scripts/lib/dedup/index.js +++ /dev/null @@ -1,6 +0,0 @@ -/** - * Create array without the duplicates - * @param {Array} list - * @return {Array} - */ -module.exports = list => Array.from(new Set(list)) diff --git a/scripts/lib/exists/index.js b/scripts/lib/exists/index.js index 74e8136d..46af1ce5 100644 --- a/scripts/lib/exists/index.js +++ b/scripts/lib/exists/index.js @@ -1,14 +1,14 @@ -const { promises: { stat } } = require('fs') +import { stat } from "node:fs/promises"; /** * @param {string} path Path to file * @returns {boolean} */ -module.exports = async function exists (path) { - try { - const stats = await stat(path) - return stats.isFile() || stats.isSymbolicLink() || stats.isDirectory() - } catch (error) { - return false - } +export async function exists(path) { + try { + const stats = await stat(path); + return stats.isFile() || stats.isSymbolicLink() || stats.isDirectory(); + } catch (error) { + return false; + } } diff --git a/scripts/lib/write/index.js b/scripts/lib/write/index.js deleted file mode 100644 index 7742d9ba..00000000 --- a/scripts/lib/write/index.js +++ /dev/null @@ -1,15 +0,0 @@ -const { promises: { writeFile } } = require('fs') - -/** - * Write JSON file - * @param {string} path - * @param {any} content - * @param {boolean} [stringify=true] - * @returns {Promise} - */ -module.exports = (destination, content, { stringify = true } = {}) => writeFile( - destination, - stringify - ? JSON.stringify(content, null, 2) + '\n' - : content.toString() -) diff --git a/src/package.json b/scripts/package.json similarity index 100% rename from src/package.json rename to scripts/package.json diff --git a/scripts/prepare/args/index.js b/scripts/prepare/args/index.js index a05a0231..2d2a1ca2 100644 --- a/scripts/prepare/args/index.js +++ b/scripts/prepare/args/index.js @@ -3,8 +3,8 @@ * @param {string[]} ΓΈ.argv * @returns {object} */ -module.exports = function args ({ argv }) { - const force = argv.includes('-f') || argv.includes('--force') +export function args({ argv }) { + const force = argv.includes("-f") || argv.includes("--force"); - return { force } + return { force }; } diff --git a/scripts/prepare/build/index.js b/scripts/prepare/build/index.js index 6ca46616..9e3038e5 100644 --- a/scripts/prepare/build/index.js +++ b/scripts/prepare/build/index.js @@ -1,108 +1,68 @@ -const { promises: { readdir, readFile } } = require('fs') -const { join } = require('path') -const { parse } = require('yaml') -const UserAgent = require('user-agents') -const dedup = require('../../lib/dedup') - -module.exports = async function build ({ fixturesDirectory, downloadsDirectory }) { - return { - browsers: dedup(await browsers({ fixturesDirectory, downloadsDirectory })), - crawlers: dedup(await crawlers({ fixturesDirectory, downloadsDirectory })) - } -} +import { readdir, readFile } from "node:fs/promises"; +import { join } from "path"; +import { parse } from "yaml"; /** - * List of web browsers user agent strings + * Return the values of objects in our YAML lists + * @param {string} path File path * @returns {string[]} */ -async function browsers ({ fixturesDirectory, downloadsDirectory }) { - const browsers = await readYaml(join(fixturesDirectory, 'browsers.yml')) - - const knownCrawlers = await crawlers({ fixturesDirectory, downloadsDirectory }) - - // Generate a random list of unique user agent strings - const random = Array(2000) - .fill() - .map( - () => new UserAgent() - ) - .map( - wrap(({ data: { userAgent: ua } }) => ua) - ) - .filter( - wrap(ua => !knownCrawlers.includes(ua)) - ) - .filter( - Boolean - ) - - return browsers.concat(random) -} +const readFixturesYaml = async (path) => + Object.values(parse((await readFile(path)).toString())).flat(); /** - * List of known crawlers user agent strings - * @returns {string[]} + * Build the lists of user agent strings + * @param {string} fixturesDirectory + * @param {string} downloadsDirectory + * @returns {Promise<{browsers: string[], crawlers: string[]}> */ -async function crawlers ({ fixturesDirectory, downloadsDirectory }) { - const crawlers = await readYaml(join(fixturesDirectory, 'crawlers.yml')) - const browsers = await readYaml(join(fixturesDirectory, 'browsers.yml')) - const downloadedFiles = await readdir(downloadsDirectory) - const downloaded = downloadedFiles.filter( - wrap(file => file.endsWith('.json')) - ).map( - wrap(file => require(join(downloadsDirectory, file))) - ).flat() - - return crawlers.concat(downloaded).filter( - wrap(ua => !ua.startsWith('#')) - ).filter( - wrap(ua => !/ucweb|cubot/i.test(ua)) // I don't know why it's in so many crawler lists - ).filter( - wrap(ua => !browsers.includes(ua)) - ).filter( - wrap(ua => ua.length < 4e3) - ) +export async function build({ fixturesDirectory, downloadsDirectory }) { + return { + browsers: Array.from(new Set(await browsers({ fixturesDirectory }))).sort(), + crawlers: Array.from( + new Set(await crawlers({ fixturesDirectory, downloadsDirectory })), + ).sort(), + }; } /** - * Return the values of objects in our YAML lists - * @param {string} path File path + * List of web browsers user agent strings + * @param {string} fixturesDirectory * @returns {string[]} */ -async function readYaml (path) { - const content = await readFile(path) - return Object.values( - parse( - content.toString() - ) - ).flat() -} - -/** - * Wrap a filter function to add arguments to error messages - * @param {Function} fn - * @returns {Function} - */ -function wrap (fn) { - return function () { - try { - return fn.apply(this, arguments) - } catch (error) { - error.message = [error.message, stringify(arguments)].join(': ') - throw error - } - } +async function browsers({ fixturesDirectory }) { + return await readFixturesYaml(join(fixturesDirectory, "browsers.yml")); } /** - * Stringify an array of arguments - * @param {any[]} array - * @returns + * List of known crawlers user agent strings + * @param {string} fixturesDirectory + * @param {string} downloadsDirectory + * @returns {string[]} */ -function stringify (array) { - try { - return JSON.stringify(array).substring(0, 100) - } catch (error) { - return array.map(item => `${item}`).join(', ').substring(0, 100) - } +async function crawlers({ fixturesDirectory, downloadsDirectory }) { + const crawlers = await readFixturesYaml( + join(fixturesDirectory, "crawlers.yml"), + ); + const browsersList = await browsers({ fixturesDirectory }); + const downloaded = []; + for (const file of await readdir(downloadsDirectory)) { + if (!file.endsWith(".json")) { + continue; + } + try { + const content = await readFile(join(downloadsDirectory, file)); + downloaded.push(...JSON.parse(content.toString())); + } catch (error) { + // Ignore + } + } + return crawlers + .concat(downloaded.flat()) + .filter((ua) => !ua.startsWith("#")) // Remove comments + .filter( + (ua = "") => !/ucweb|cubot/i.test(ua), // I don't know why it's in so many crawler lists + ) + .filter((ua) => !browsersList.includes(ua)) // Remove browsers manually added to browsers.yml + .filter((ua = "") => ua.length < 4e3); // Remove very long user agent strings } diff --git a/scripts/prepare/externals/index.js b/scripts/prepare/externals/index.js index 28d5dafd..4949f543 100644 --- a/scripts/prepare/externals/index.js +++ b/scripts/prepare/externals/index.js @@ -1,17 +1,17 @@ -const { join } = require('path') -const { parse } = require('yaml') -const client = require('../../lib/client') -const exists = require('../../lib/exists') -const write = require('../../lib/write') +import { writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { parse } from "yaml"; +import { exists } from "../../lib/exists/index.js"; -const { log } = console +const { log } = console; -module.exports.download = ({ dir, force = false } = {}) => Promise.all([ - monperrus({ dir, force }), - matomoOrg({ dir, force }), - userAgentsNet({ dir, force }), - myipMs({ dir, force }) -]) +export const download = ({ dir, force = false } = {}) => + Promise.all([ + monperrus({ dir, force }), + matomoOrg({ dir, force }), + userAgentsNet({ dir, force }), + myipMs({ dir, force }), + ]); /** * Read remote file and create JSON list locally @@ -19,20 +19,20 @@ module.exports.download = ({ dir, force = false } = {}) => Promise.all([ * @param {boolean} [ΓΈ.force] Read even if file exists * @returns {Promise} */ -async function monperrus ({ dir = join(__dirname, '..'), force = false } = {}) { - const destination = join(dir, 'monperrus.json') - if (!force && await exists(destination)) { - log(`Skip ${destination} - Already exists.`) - return 0 - } - log(`Download content for ${destination}`) - const response = await client({ url: 'https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json' }) - const list = JSON.parse(response).map( - ({ instances }) => instances - ).flat() - log(`Write ${destination}`) - await write(destination, list) - return 1 +async function monperrus({ dir = join(__dirname, ".."), force = false } = {}) { + const destination = join(dir, "monperrus.json"); + if (!force && (await exists(destination))) { + log(`Skip ${destination} - Already exists.`); + return 0; + } + log(`Download content for ${destination}`); + const response = await fetch( + "https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json", + ); + const list = (await response.json()).map(({ instances }) => instances).flat(); + log(`Write ${destination}`); + await writeFile(destination, JSON.stringify(list, null, 2) + "\n"); + return 1; } /** @@ -41,20 +41,22 @@ async function monperrus ({ dir = join(__dirname, '..'), force = false } = {}) { * @param {boolean} [ΓΈ.force] Read even if file exists * @returns {Promise} */ -async function matomoOrg ({ dir = join(__dirname, '..'), force = false } = {}) { - const destination = join(dir, 'matomo-org.json') - if (!force && await exists(destination)) { - log(`Skip ${destination} - Already exists.`) - return 0 - } - log(`Download content for ${destination}`) - const response = await client({ url: 'https://raw.githubusercontent.com/matomo-org/device-detector/master/Tests/fixtures/bots.yml' }) - const list = parse(response).map( - ({ user_agent }) => user_agent // eslint-disable-line camelcase - ) - log(`Write ${destination}`) - await write(destination, list) - return 1 +async function matomoOrg({ dir = join(__dirname, ".."), force = false } = {}) { + const destination = join(dir, "matomo-org.json"); + if (!force && (await exists(destination))) { + log(`Skip ${destination} - Already exists.`); + return 0; + } + log(`Download content for ${destination}`); + const response = await fetch( + "https://raw.githubusercontent.com/matomo-org/device-detector/master/Tests/fixtures/bots.yml", + ); + const list = parse(await response.text()).map( + ({ user_agent }) => user_agent, // eslint-disable-line camelcase + ); + log(`Write ${destination}`); + await writeFile(destination, JSON.stringify(list, null, 2) + "\n"); + return 1; } /** @@ -63,27 +65,33 @@ async function matomoOrg ({ dir = join(__dirname, '..'), force = false } = {}) { * @param {boolean} [ΓΈ.force] Read even if file exists * @returns {Promise} */ -async function userAgentsNet ({ dir = join(__dirname, '..'), force = false } = {}) { - const destination = join(dir, 'user-agents.net.json') - if (!force && await exists(destination)) { - log(`Skip ${destination} - Already exists.`) - return 0 - } - log(`Download content for ${destination}`) - const response = await client( - { - url: 'https://user-agents.net/download', - method: 'POST', - data: { - browser_type: 'bot-crawler', - download: 'json' - } - } - ) - const list = JSON.parse(response) - log(`Write ${destination}`) - await write(destination, list) - return 1 +async function userAgentsNet({ + dir = join(__dirname, ".."), + force = false, +} = {}) { + const destination = join(dir, "user-agents.net.json"); + if (!force && (await exists(destination))) { + log(`Skip ${destination} - Already exists.`); + return 0; + } + log(`Download content for ${destination}`); + const response = await fetch("https://user-agents.net/download", { + method: "POST", + body: [ + ["browser_type", "bot-crawler"], + ["download", "json"], + ] + .map((entry) => entry.join("=")) + .join("&"), + headers: new Headers([ + ["Content-Type", "application/x-www-form-urlencoded"], + ["User-Agent", "omrilotan/isbot"], + ]), + }); + const list = await response.json(); + log(`Write ${destination}`); + await writeFile(destination, JSON.stringify(list, null, 2) + "\n"); + return 1; } /** @@ -92,22 +100,21 @@ async function userAgentsNet ({ dir = join(__dirname, '..'), force = false } = { * @param {boolean} [ΓΈ.force] Read even if file exists * @returns {Promise} */ -async function myipMs ({ dir = join(__dirname, '..'), force = false } = {}) { - const destination = join(dir, 'myip.ms.json') - if (!force && await exists(destination)) { - log(`Skip ${destination} - Already exists.`) - return 0 - } - log(`Download content for ${destination}`) - const response = await client({ url: 'https://myip.ms/files/bots/live_webcrawlers.txt' }) - const list = response.split( - '\n' - ).map( - line => line.split('records - ')[1] - ).filter( - Boolean - ) - log(`Write ${destination}`) - await write(destination, list) - return 1 +async function myipMs({ dir = join(__dirname, ".."), force = false } = {}) { + const destination = join(dir, "myip.ms.json"); + if (!force && (await exists(destination))) { + log(`Skip ${destination} - Already exists.`); + return 0; + } + log(`Download content for ${destination}`); + const response = await fetch( + "https://myip.ms/files/bots/live_webcrawlers.txt", + ); + const list = (await response.text()) + .split("\n") + .map((line) => line.split("records - ")[1]) + .filter(Boolean); + log(`Write ${destination}`); + await writeFile(destination, JSON.stringify(list, null, 2) + "\n"); + return 1; } diff --git a/scripts/prepare/index.js b/scripts/prepare/index.js index 29ba041a..eed667ab 100755 --- a/scripts/prepare/index.js +++ b/scripts/prepare/index.js @@ -1,47 +1,48 @@ #!/usr/bin/env node -const { promises: { mkdir, writeFile } } = require('fs') -const { join } = require('path') -const write = require('../lib/write') -const args = require('./args') -const build = require('./build') -const { download } = require('./externals') +import { mkdir, writeFile } from "node:fs/promises"; +import { join } from "path"; +import { args } from "./args/index.js"; +import { build } from "./build/index.js"; +import { download } from "./externals/index.js"; -const { log } = console +const { log } = console; /** * scripts/prepare.js [-f] [--force] */ -start(process) +start(process); /** * Run this script * @paran {string[]} ΓΈ.argv * @returns {void} */ -async function start ({ argv }) { - const { force } = args({ argv }) - const fixturesDirectory = join(__dirname, '..', '..', 'fixtures') - const downloadsDirectory = join(fixturesDirectory, 'downloads') +async function start({ argv }) { + const { force } = args({ argv }); + const fixturesDirectory = join("fixtures"); + const downloadsDirectory = join(fixturesDirectory, "downloads"); - await mkdir(downloadsDirectory, { recursive: true }) - const results = await download({ dir: downloadsDirectory, force }) - const news = results.reduce((a, b) => a + b) - if (news) { - log('Create new timestamp') - await write( - join(downloadsDirectory, 'downloaded'), - new Date().toUTCString(), - { stringify: false } - ) - } else { - log('No new files were downloaded') - } + await mkdir(downloadsDirectory, { recursive: true }); + const results = await download({ dir: downloadsDirectory, force }); + const news = results.reduce((a, b) => a + b); + if (news) { + log("Create new timestamp"); + await writeFile( + join(downloadsDirectory, "downloaded"), + new Date().toUTCString(), + ); + } else { + log("No new files were downloaded"); + } - log('Create fixtures JSON') - const { browsers, crawlers } = await build({ fixturesDirectory, downloadsDirectory }) - await writeFile( - join(fixturesDirectory, 'index.json'), - JSON.stringify({ browsers, crawlers }, null, 2) + '\n' - ) -}; + log("Create fixtures JSON"); + const { browsers, crawlers } = await build({ + fixturesDirectory, + downloadsDirectory, + }); + await writeFile( + join(fixturesDirectory, "index.json"), + JSON.stringify({ browsers, crawlers }, null, 2) + "\n", + ); +} diff --git a/scripts/prepublish/procedure.sh b/scripts/prepublish/procedure.sh new file mode 100755 index 00000000..01f75011 --- /dev/null +++ b/scripts/prepublish/procedure.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +failures=0 + +if [ -e "index.js" ]; then + echo "β†’ Build files found, skip build" +else + echo "β†’ Build" + npm run build +fi +failures=$((failures + $?)) + +echo "β†’ Create AUTHORS file" +./scripts/authors/index.js +failures=$((failures + $?)) + +echo -e "β†’ Number of failures: ${failures}" +exit $failures diff --git a/scripts/pushmodules/index.sh b/scripts/pushmodules/index.sh deleted file mode 100755 index 48da1fbf..00000000 --- a/scripts/pushmodules/index.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -git submodule foreach git add . -git submodule foreach git commit -m $(date +"%Y-%m-%d") -git submodule foreach git push origin downloads --force-with-lease diff --git a/scripts/sort/dedup/index.js b/scripts/sort/dedup/index.js deleted file mode 100644 index d6ee9007..00000000 --- a/scripts/sort/dedup/index.js +++ /dev/null @@ -1,6 +0,0 @@ -/** - * Create array without the duplicates - * @param {any[]} list - * @return {any[]} - */ -module.exports = list => Array.from(new Set(list)) diff --git a/scripts/sort/downcase/index.js b/scripts/sort/downcase/index.js deleted file mode 100644 index 39c35096..00000000 --- a/scripts/sort/downcase/index.js +++ /dev/null @@ -1,6 +0,0 @@ -/** - * Return a lowercase copy - * @param {string} str - * @returns {string} - */ -module.exports = str => str.toLowerCase() diff --git a/scripts/sort/index.js b/scripts/sort/index.js index fb38d17b..5ef65739 100755 --- a/scripts/sort/index.js +++ b/scripts/sort/index.js @@ -1,27 +1,27 @@ #!/usr/bin/env node -const { join } = require('path') -const sortJSON = require('./sortJSON') -const sortYamlFile = require('./sortYamlFile') +import { join } from "node:path"; +import { sortJSON } from "./sortJSON/index.js"; +import { sortYamlFile } from "./sortYamlFile/index.js"; -start() +start(); -async function start () { - const errors = [] +async function start() { + const errors = []; - async function call (fn, ...args) { - try { - await fn.apply(this, args) - } catch (error) { - errors.push(error) - } - } + async function call(fn, ...args) { + try { + await fn.apply(this, args); + } catch (error) { + errors.push(error); + } + } - await Promise.all([ - call(sortYamlFile, join(__dirname, '..', '..', 'fixtures', 'crawlers.yml')), - call(sortYamlFile, join(__dirname, '..', '..', 'fixtures', 'browsers.yml')), - call(sortJSON, join(__dirname, '..', '..', 'src', 'list.json')) - ]) - errors.forEach(error => console.error(error)) - process.exitCode = errors.length + await Promise.all([ + call(sortYamlFile, join("fixtures", "crawlers.yml")), + call(sortYamlFile, join("fixtures", "browsers.yml")), + call(sortJSON, join("src", "patterns.json")), + ]); + errors.forEach((error) => console.error(error)); + process.exitCode = errors.length; } diff --git a/scripts/sort/sort/index.js b/scripts/sort/sort/index.js index 64585c9b..0e012031 100644 --- a/scripts/sort/sort/index.js +++ b/scripts/sort/sort/index.js @@ -1,14 +1,12 @@ -const downcase = require('../downcase') - /** * Case insensitive Sort * @param {string} a * @param {string} b * @returns {number} */ -module.exports = function sort (a, b) { - a = downcase(a) - b = downcase(b) +export function sort(a, b) { + a = a.toLowerCase(); + b = b.toLowerCase(); - return a > b ? 1 : b > a ? -1 : 0 + return a > b ? 1 : b > a ? -1 : 0; } diff --git a/scripts/sort/sortJSON/index.js b/scripts/sort/sortJSON/index.js index 8bfbf5f7..c7efa5c1 100644 --- a/scripts/sort/sortJSON/index.js +++ b/scripts/sort/sortJSON/index.js @@ -1,21 +1,15 @@ -const { promises: { writeFile } } = require('fs') -const dedup = require('../dedup') -const sort = require('../sort') +import { readFile, writeFile } from "node:fs/promises"; +import { sort } from "../sort/index.js"; /** * Read, sort, and save JSON file * @param {string} filepath * @returns {Promise} */ -module.exports = async function sortJSON (filepath) { - const list = require(filepath) - - await writeFile( - filepath, - JSON.stringify( - dedup(list).sort(sort), - null, - 2 - ) + '\n' - ) +export async function sortJSON(filepath) { + const list = JSON.parse((await readFile(filepath)).toString()); + await writeFile( + filepath, + JSON.stringify(Array.from(new Set(list)).sort(sort), null, 2) + "\n", + ); } diff --git a/scripts/sort/sortYamlFile/index.js b/scripts/sort/sortYamlFile/index.js index be490303..ec9ed5d4 100644 --- a/scripts/sort/sortYamlFile/index.js +++ b/scripts/sort/sortYamlFile/index.js @@ -1,38 +1,35 @@ -const { promises: { readFile, writeFile } } = require('fs') -const YAML = require('yaml') -const dedup = require('../dedup') -const downcase = require('../downcase') -const sort = require('../sort') +import { readFile, writeFile } from "node:fs/promises"; +import YAML from "yaml"; +import { sort } from "../sort/index.js"; /** * Read, sort, and save Yaml file * @param {String} filepath * @return {Promise} */ -module.exports = async function sortYamlFile (filepath) { - const content = (await readFile(filepath)).toString() - const data = YAML.parse(content) +export async function sortYamlFile(filepath) { + const data = YAML.parse((await readFile(filepath)).toString()); - const sorted = Object.fromEntries( - Object.entries( - data + const sorted = Object.fromEntries( + Object.entries( + data, - // Sort keys - ).sort( - ([_a], [_b]) => { - const [a, b] = [_a, _b].map(downcase) + // Sort keys + ) + .sort( + ([_a], [_b]) => { + const [a, b] = [_a, _b].map((i) => i.toLowerCase()); - return a > b ? 1 : a < b ? -1 : 0 - } + return a > b ? 1 : a < b ? -1 : 0; + }, - // Remove duplicates and sort lists - ).map( - ([k, v]) => [k, dedup(v).sort(sort)] - ) - ) + // Remove duplicates and sort lists + ) + .map(([k, v]) => [k, Array.from(new Set(v)).sort(sort)]), + ); - await writeFile( - filepath, - YAML.stringify(sorted, undefined, { lineWidth: Infinity }) - ) + await writeFile( + filepath, + YAML.stringify(sorted, undefined, { lineWidth: Infinity }), + ); } diff --git a/scripts/symlink/index.js b/scripts/symlink/index.js deleted file mode 100755 index 35aefc8f..00000000 --- a/scripts/symlink/index.js +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env node - -const { promises: { mkdir, symlink } } = require('fs') -const { join } = require('path') -const exists = require('../lib/exists') -const ROOT = join(__dirname, '..', '..') - -start(ROOT, 'esm', 'cjs', 'browser') - -async function start (ROOT, ...directories) { - await Promise.all( - directories.map( - dir => async () => { - const modules = join(ROOT, 'tests', dir, 'node_modules') - const destination = join(modules, 'isbot') - if (await exists(destination)) { - return - } - await mkdir( - modules, { recursive: true } - ) - try { - await symlink(ROOT, destination, 'dir') - } catch (error) { - if (error.code === 'EEXIST') { - return - } - throw error - } - } - ).map(fn => fn()) - ) -} diff --git a/src/amend/index.js b/src/amend/index.js deleted file mode 100644 index d06ff226..00000000 --- a/src/amend/index.js +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Mutate given list of patter strings - * @param {string[]} list - * @returns {string[]} - */ -export function amend (list) { - try { - // Risk: Uses lookbehind assertion, avoid breakage in parsing by using RegExp constructor - new RegExp('(? { - const index = list.lastIndexOf(search) - if (~index) { - list.splice(index, 1, replace) - } - } - ) - - return list -} diff --git a/src/browser.ts b/src/browser.ts new file mode 100644 index 00000000..94cdfa6a --- /dev/null +++ b/src/browser.ts @@ -0,0 +1,3 @@ +import { isbot } from "."; + +export default isbot; diff --git a/src/index.js b/src/index.js deleted file mode 100644 index 4c4f0f5f..00000000 --- a/src/index.js +++ /dev/null @@ -1,5 +0,0 @@ -import { Isbot } from './isbot/index.js' - -const isbot = new Isbot() - -export default isbot diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 00000000..34a820cb --- /dev/null +++ b/src/index.ts @@ -0,0 +1,56 @@ +import { regex } from "./pattern"; +import patternsList from "./patterns.json"; + +// Workaround for TypeScript's type definition of imported variables and JSON files. + +/** + * A pattern that matches bot identifiers in user agent strings. + */ +const pattern: RegExp = regex; + +/** + * A list of bot identifiers to be used in a regular expression against user agent strings. + */ +const list: string[] = patternsList; + +/** + * Check if the given user agent includes a bot pattern. + */ +export const isbot = (userAgent: string): boolean => + Boolean(userAgent) && pattern.test(userAgent); + +/** + * Create a custom isbot function with a custom pattern. + */ +export const createIsbot = + (customPattern: RegExp) => + (userAgent: string): boolean => + Boolean(userAgent) && customPattern.test(userAgent); + +/** + * Find the first part of the user agent that matches a bot pattern. + */ +export const isbotMatch = (userAgent: string): string | null => + userAgent.match(pattern)?.[0]; + +/** + * Find all parts of the user agent that match a bot pattern. + */ +export const isbotMatches = (userAgent: string): string[] => + list + .map((part) => userAgent.match(new RegExp(part, "i"))?.[0]) + .filter(Boolean); + +/** + * Find the first bot patterns that match the given user agent. + */ +export const isbotPattern = (userAgent: string): string | null => + list.find((patten) => new RegExp(patten, "i").test(userAgent)) ?? null; + +/** + * Find all bot patterns that match the given user agent. + */ +export const isbotPatterns = (userAgent: string): string[] => + list.filter((patten) => new RegExp(patten, "i").test(userAgent)); + +export { pattern, list }; diff --git a/src/isbot/index.js b/src/isbot/index.js deleted file mode 100644 index 6c9cc914..00000000 --- a/src/isbot/index.js +++ /dev/null @@ -1,158 +0,0 @@ -import list from '../list.json' assert { type: 'json' } -import { amend } from '../amend/index.js' - -amend(list) - -const flags = 'i' - -/** - * Test user agents for matching patterns - */ -export class Isbot { - /** - * @type {string[]} - */ - #list - - /** - * @type {RegExp} - */ - #pattern - - constructor (patterns) { - this.#list = patterns || list.slice() - this.#update() - - const isbot = ua => this.test(ua) - - return Object.defineProperties( - isbot, - Object.entries(Object.getOwnPropertyDescriptors(Isbot.prototype)).reduce( - (accumulator, [prop, descriptor]) => { - if (typeof descriptor.value === 'function') { - Object.assign( - accumulator, - { [prop]: { value: this[prop].bind(this) } } - ) - } - if (typeof descriptor.get === 'function') { - Object.assign( - accumulator, - { [prop]: { get: () => this[prop] } } - ) - } - return accumulator - }, - {} - ) - ) - } - - /** - * Recreate the pattern from rules list - */ - #update () { - this.#pattern = new RegExp( - this.#list.join('|'), - flags - ) - } - - /** - * Find the first index of an existing rule or -1 if not found - * @param {string} rule - * @returns {number} - */ - #index (rule) { - return this.#list.indexOf(rule.toLowerCase()) - } - - /** - * Get a clone of the pattern - * @type RegExp - */ - get pattern () { - return new RegExp(this.#pattern) - } - - /** - * Match given string against out pattern - * @param {string} ua User Agent string - * @returns {boolean} - */ - test (ua) { - return Boolean(ua) && this.#pattern.test(ua) - } - - /** - * Get the match for strings' known crawler pattern - * @param {string} ua User Agent string - * @returns {string|null} - */ - find (ua = '') { - const match = ua.match(this.#pattern) - return match && match[0] - } - - /** - * Get the patterns that match user agent string if any - * @param {string} ua User Agent string - * @returns {string[]} - */ - matches (ua = '') { - return this.#list.filter( - entry => new RegExp(entry, flags).test(ua) - ) - } - - /** - * Clear all patterns that match user agent - * @param {string} ua User Agent string - * @returns {void} - */ - clear (ua = '') { - this.exclude(this.matches(ua)) - } - - /** - * Extent patterns for known crawlers - * @param {string[]} filters - * @returns {void} - */ - extend (filters = []) { - [].push.apply( - this.#list, - filters.filter( - rule => this.#index(rule) === -1 - ).map( - filter => filter.toLowerCase() - ) - ) - this.#update() - } - - /** - * Exclude patterns from bot pattern rule - * @param {string[]} filters - * @returns {void} - */ - exclude (filters = []) { - let { length } = filters - while (length--) { - const index = this.#index(filters[length]) - if (index > -1) { - this.#list.splice(index, 1) - } - } - this.#update() - } - - /** - * Create a new Isbot instance using given list or self's list - * @param {string[]} [list] - * @returns {Isbot} - */ - spawn (list) { - return new Isbot(list || this.#list) - } -} diff --git a/src/list.json b/src/patterns.json similarity index 93% rename from src/list.json rename to src/patterns.json index 08d619a8..1086123b 100644 --- a/src/list.json +++ b/src/patterns.json @@ -4,6 +4,10 @@ " yadirectfetcher", "(?:^| )site", "(?:^|[^g])news", + "(? { - const { LOG_INFO: logLevel } = config - config.set({ - browsers: ['Chrome', CI ? undefined : 'Firefox'].filter(Boolean), - frameworks: ['mocha'], - port: 9876, - logLevel, - singleRun: true, - concurrency: 1, - hooks: [ - 'karma-chrome-launcher', - CI ? undefined : 'karma-firefox-launcher', - 'karma-mocha', - 'karma-mocha-reporter' - ].filter(Boolean), - reporters: ['mocha'], - basePath: __dirname, - files: [file] - }) -} diff --git a/tests/browser/rollup.js b/tests/browser/rollup.js deleted file mode 100644 index 1c4efbf9..00000000 --- a/tests/browser/rollup.js +++ /dev/null @@ -1,14 +0,0 @@ -const { join } = require('path') -const { nodeResolve } = require('@rollup/plugin-node-resolve') - -module.exports = { - input: join(__dirname, 'spec.js'), - output: { - file: join(__dirname, 'dist.js'), - format: 'iife', - name: 'isbot' - }, - plugins: [ - nodeResolve({ browser: true }) - ] -} diff --git a/tests/browser/spec.js b/tests/browser/spec.js deleted file mode 100644 index 80c6d66f..00000000 --- a/tests/browser/spec.js +++ /dev/null @@ -1,21 +0,0 @@ -/* eslint-env mocha */ - -import isbot from 'isbot' - -const BROWSER_UA = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.86 Safari/533.4' -const CRAWLER_UA = 'Mozilla/3.0 (compatible; Web Link Validator 2.x)Web Link Validator http://www.relsoftware.com/ link validation software' - -describe('browser', () => { - describe('Sanity test in browser', () => { - it('should return false for a browser', () => { - if (isbot(BROWSER_UA) !== false) { - throw new Error(`Sould have passed browser "${BROWSER_UA}"`) - } - }) - it('should return true for a known crawler', () => { - if (isbot(CRAWLER_UA) !== true) { - throw new Error(`Sould have failed crawler "${CRAWLER_UA}"`) - } - }) - }) -}) diff --git a/tests/cjs/package.json b/tests/cjs/package.json deleted file mode 100644 index 5bbefffb..00000000 --- a/tests/cjs/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "commonjs" -} diff --git a/tests/cjs/spec.js b/tests/cjs/spec.js deleted file mode 100644 index b0c07f57..00000000 --- a/tests/cjs/spec.js +++ /dev/null @@ -1,19 +0,0 @@ -/* eslint-env mocha */ - -const { strictEqual } = require('assert') -const isbot = require('isbot') - -describe( - 'cjs', - () => [ - ['Mozilla/5.0 (Windows; rv:81.0) Gecko/20100101 Firefox/81.0', false], - ['Mozilla/5.0 (Windows; rv:81.0) Gecko/20100101 Unkown/81.0', false], - ['Unknown Tool/1.0', true], - ['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/84.0.4147.108 Safari/537.36', true] - ].forEach( - ([ua, result]) => it( - `should return ${result} for ${ua}`, - () => strictEqual(isbot(ua), result) - ) - ) -) diff --git a/tests/efficiency/spec.js b/tests/efficiency/spec.js deleted file mode 100644 index 4e406005..00000000 --- a/tests/efficiency/spec.js +++ /dev/null @@ -1,81 +0,0 @@ -/* eslint-env mocha */ - -import { strict } from 'assert' -import wait from '@lets/wait' -import list from '../../src/list.json' assert { type: 'json' } -import fixtures from '../../fixtures/index.json' assert { type: 'json' } -import stdline from 'stdline' - -const { fail } = strict -const { crawlers = [] } = fixtures -const { update, end } = stdline -Object.freeze(list) - -const clone = () => list.slice() - -describe('efficiency', () => { - describe(`All rules are needed. Check each one against ${crawlers.length} user agent strings`, () => { - it('should find no unneeded rules', async function () { - this.timeout(30000) - - const redundantRules = [] - - let { length } = list - while (--length) { - update(`Check rule ${list.length - length}/${list.length}`) - const temp = clone() - const [rule] = temp.splice(length, 1) - const pattern = new RegExp(temp.join('|'), 'i') - const isbot = ua => pattern.test(ua) - const unmatched = crawlers.filter(isbot) - - if (crawlers.length - unmatched.length === 0) { - redundantRules.push(rule) - } - if (length % 50 === 0) { - global.gc() - await wait() - } - } - end() - redundantRules.length && fail([ - `Found ${redundantRules.length} redundant rules`, - ...redundantRules - ].join('\n')) - }) - }) - describe(`Rules can not be prefixed with a hat. Check each one against ${crawlers.length} user agent strings`, () => { - it('should not be missing a hat', async function () { - this.timeout(30000) - - const rulesWithNoHat = [] - - let { length } = list - while (--length) { - update(`Check rule ${list.length - length}/${list.length}`) - const temp = clone() - const [rule] = temp.splice(length, 1) - if (rule.startsWith('^')) { - continue - } - temp.push(`^${rule}`) - const pattern = new RegExp(temp.join('|'), 'i') - const isbot = ua => pattern.test(ua) - const unmatched = crawlers.filter(isbot) - - if (unmatched.length === crawlers.length) { - rulesWithNoHat.push(rule) - } - if (length % 50 === 0) { - global.gc() - await wait() - } - } - end() - rulesWithNoHat.length && fail([ - `Found ${rulesWithNoHat.length} rules with no hats`, - ...rulesWithNoHat.map(rule => `Replace '${rule}' with '^${rule}'`) - ].join('\n')) - }) - }) -}) diff --git a/tests/efficiency/test.ts b/tests/efficiency/test.ts new file mode 100644 index 00000000..df6f7e8c --- /dev/null +++ b/tests/efficiency/test.ts @@ -0,0 +1,95 @@ +import { isbot } from "../../src"; +import list from "../../src/patterns.json"; +import { browsers, crawlers } from "../../fixtures"; +import stdline from "stdline"; + +const wait = (): Promise => + new Promise((resolve) => setTimeout(resolve, 0)); +const TIMEOUT = 60000; + +const { update, end } = stdline; +Object.freeze(list); + +const clone = (): string[] => list.slice(); + +describe("efficiency", () => { + describe(`Redundant rules: no rule can be removed. Check each one against ${crawlers.length} user agent strings`, () => { + test( + "unneeded rules", + async function () { + jest.setTimeout(60000); + const redundantRules: string[] = []; + + let { length } = list; + while (--length) { + update(`Check rule ${list.length - length}/${list.length}`); + const temp = clone(); + const [rule] = temp.splice(length, 1); + const pattern = new RegExp(temp.join("|"), "i"); + const isbot = (ua) => pattern.test(ua); + const unmatched = crawlers.filter(isbot); + + if (crawlers.length - unmatched.length === 0) { + redundantRules.push(rule); + } + if (length % 50 === 0) { + global.gc?.(); + await wait(); + } + } + end(); + if (redundantRules.length) { + throw new Error( + [ + `Found ${redundantRules.length} redundant rules`, + ...redundantRules, + ].join("\n"), + ); + } + }, + TIMEOUT, + ); + }); + describe(`Pattern efficiency: Rules can not be prefixed with a hat. Check each one against ${crawlers.length} user agent strings`, () => { + test( + "Some items should have a hat", + async function () { + const rulesWithNoHat: string[] = []; + + let { length } = list; + while (--length) { + update(`Check rule ${list.length - length}/${list.length}`); + const temp = clone(); + const [rule] = temp.splice(length, 1); + if (rule.startsWith("^")) { + continue; + } + temp.push(`^${rule}`); + const pattern = new RegExp(temp.join("|"), "i"); + const isbot = (ua) => pattern.test(ua); + const unmatched = crawlers.filter(isbot); + + if (unmatched.length === crawlers.length) { + rulesWithNoHat.push(rule); + } + if (length % 50 === 0) { + global.gc?.(); + await wait(); + } + } + end(); + if (rulesWithNoHat.length) { + throw new Error( + [ + `Found ${rulesWithNoHat.length} rules with no hats`, + ...rulesWithNoHat.map( + (rule) => `Replace '${rule}' with '^${rule}'`, + ), + ].join("\n"), + ); + } + }, + TIMEOUT, + ); + }); +}); diff --git a/tests/esm/package.json b/tests/esm/package.json deleted file mode 100644 index 3dbc1ca5..00000000 --- a/tests/esm/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "module" -} diff --git a/tests/esm/spec.js b/tests/esm/spec.js deleted file mode 100644 index 53048c10..00000000 --- a/tests/esm/spec.js +++ /dev/null @@ -1,19 +0,0 @@ -/* eslint-env mocha */ - -import { strictEqual } from 'assert' -import isbot from 'isbot' - -describe( - 'esm', - () => [ - ['Mozilla/5.0 (Windows; rv:81.0) Gecko/20100101 Firefox/81.0', false], - ['Mozilla/5.0 (Windows; rv:81.0) Gecko/20100101 Unkown/81.0', false], - ['Unknown Tool/1.0', true], - ['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/84.0.4147.108 Safari/537.36', true] - ].forEach( - ([ua, result]) => it( - `should return ${result} for ${ua}`, - () => strictEqual(isbot(ua), result) - ) - ) -) diff --git a/tests/jest/.npmrc b/tests/jest/.npmrc deleted file mode 100644 index 43c97e71..00000000 --- a/tests/jest/.npmrc +++ /dev/null @@ -1 +0,0 @@ -package-lock=false diff --git a/tests/jest/babel.config.js b/tests/jest/babel.config.js deleted file mode 100644 index 7b976f52..00000000 --- a/tests/jest/babel.config.js +++ /dev/null @@ -1,6 +0,0 @@ -module.exports = { - presets: [ - ['@babel/preset-env', { targets: { node: 'current' } }], - '@babel/preset-typescript' - ] -} diff --git a/tests/jest/cjs/test.js b/tests/jest/cjs/test.js deleted file mode 100644 index 1c56b07d..00000000 --- a/tests/jest/cjs/test.js +++ /dev/null @@ -1,9 +0,0 @@ -/* eslint-env jest */ - -const isbot = require('isbot') - -describe('jest test', () => { - test('should pass', () => { - expect(isbot('Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)')).toBeTruthy() - }) -}) diff --git a/tests/jest/esm/test.js b/tests/jest/esm/test.js deleted file mode 100644 index 761ceec7..00000000 --- a/tests/jest/esm/test.js +++ /dev/null @@ -1,9 +0,0 @@ -/* eslint-env jest */ - -import isbot from 'isbot' - -describe('jest test', () => { - test('should pass', () => { - expect(isbot('Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)')).toBeTruthy() - }) -}) diff --git a/tests/jest/package.json b/tests/jest/package.json deleted file mode 100644 index 8f1fcc57..00000000 --- a/tests/jest/package.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "type": "commonjs", - "scripts": { - "browser": "jest esm --env='jsdom'", - "cjs": "jest cjs", - "esm": "jest esm", - "pretest": "which jest || npm i", - "test": "npm run esm && npm run cjs && npm run browser && npm run ts", - "ts": "jest ts" - }, - "dependencies": { - "isbot": "../.." - }, - "devDependencies": { - "@babel/core": "^7.17.10", - "@babel/preset-env": "^7.17.10", - "@babel/preset-typescript": "^7.16.7", - "@types/jest": "^27.5.0", - "babel-jest": "^28.1.0", - "jest": "^28.1.0", - "jest-environment-jsdom": "^28.1.0", - "ts-jest": "^28.0.2" - } -} diff --git a/tests/jest/ts/test.ts b/tests/jest/ts/test.ts deleted file mode 100644 index 3ff1a4cc..00000000 --- a/tests/jest/ts/test.ts +++ /dev/null @@ -1,10 +0,0 @@ -/* eslint-env jest */ - -import isbot from 'isbot' -import { strict as assert } from 'assert' - -describe('jest test', (): void => { - test('should pass', (): void => { - assert(isbot('Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)')) - }) -}) diff --git a/tests/efficiency/package.json b/tests/package.json similarity index 100% rename from tests/efficiency/package.json rename to tests/package.json diff --git a/tests/spec/test.ts b/tests/spec/test.ts new file mode 100644 index 00000000..95851e62 --- /dev/null +++ b/tests/spec/test.ts @@ -0,0 +1,73 @@ +import { + isbot, + isbotMatch, + isbotMatches, + isbotPattern, + isbotPatterns, + createIsbot, +} from "../../src"; +import { crawlers, browsers } from "../../fixtures"; + +const BOT_USER_AGENT_EXAMPLE = + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; +const BROWSER_USER_AGENT_EXAMPLE = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91 Safari/537.36"; + +describe("isbot", () => { + describe("features", () => { + test("isbot: bot user agect string is recognised as bot", () => { + expect(isbot(BOT_USER_AGENT_EXAMPLE)).toBe(true); + }); + test("isbotMatch: find pattern in bot user agent string", () => { + expect(isbotMatch(BOT_USER_AGENT_EXAMPLE)).toBe("Google"); + }); + test("isbotMatches: find all patterns in bot user agent string", () => { + expect(isbotMatches(BOT_USER_AGENT_EXAMPLE)).toContain("Google"); + expect(isbotMatches(BOT_USER_AGENT_EXAMPLE)).toHaveLength(3); + }); + test("isbotPattern: find first pattern in bot user agent string", () => { + expect(isbotPattern(BOT_USER_AGENT_EXAMPLE)).toBe( + "(? { + expect(isbotPatterns(BOT_USER_AGENT_EXAMPLE)).toContain( + "(? { + const customIsbot = createIsbot(/bot/i); + expect(customIsbot(BOT_USER_AGENT_EXAMPLE)).toBe(true); + }); + }); + + describe("fixtures", () => { + test(`βœ”οΈŽ ${crawlers.length} user agent string should be recognised as crawler`, () => { + let successCount = 0; + let misidentifiedStrings: string[] = []; + crawlers.forEach((crawler) => { + if (isbot(crawler)) { + successCount++; + } else { + misidentifiedStrings.push(crawler); + } + }); + expect(misidentifiedStrings).toEqual([]); + expect(successCount).toBe(crawlers.length); + }); + test(`✘ ${browsers.length} user agent string should not be recognised as crawler`, () => { + let successCount = 0; + let misidentifiedStrings: string[] = []; + browsers.forEach((browser) => { + if (isbot(browser)) { + misidentifiedStrings.push(browser); + } else { + successCount++; + } + }); + expect(misidentifiedStrings).toEqual([]); + expect(successCount).toBe(browsers.length); + }); + }); +}); diff --git a/tests/specs/package.json b/tests/specs/package.json deleted file mode 100644 index 3dbc1ca5..00000000 --- a/tests/specs/package.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "module" -} diff --git a/tests/specs/spec.js b/tests/specs/spec.js deleted file mode 100644 index cdbcf473..00000000 --- a/tests/specs/spec.js +++ /dev/null @@ -1,158 +0,0 @@ -/* eslint-env mocha */ - -import { strict as assert } from 'assert' -import isbot from '../../src/index.js' -import fixtures from '../../fixtures/index.json' assert { type: 'json' } - -const { browsers = [], crawlers = [] } = fixtures - -const { equal, fail, notEqual } = assert -let spawn - -describe( - 'specs', - () => { - beforeEach(() => { - spawn = isbot.spawn() - }) - - it('should not break with empty input', () => { - equal(spawn(), false) - }) - - it(`should return false for all ${browsers.length} browsers`, () => { - const recognised = browsers.filter(spawn) - - recognised.length && fail([ - `Recognised as bots ${recognised.length} user agents:`, - ...recognised.map(item => ` - ${item}`) - ].join('\n')) - }) - - it(`should return true for all ${crawlers.length} crawlers`, () => { - const unrecognised = crawlers.filter(ua => !spawn(ua)) - unrecognised.length && fail([ - `Unrecognised as bots ${unrecognised.length} user agents:`, - ...unrecognised.map(item => ` - ${item}`) - ].join('\n')) - }) - - describe('spawn.extend', () => { - const useragent = 'Mozilla/5.0 (Linux) Randomagent/93.0' - const rule = 'randomagent/\\d+\\.\\d+' - - it(`should not detect "${rule}" as bot`, () => { - assert(!spawn(useragent)) - }) - - it(`should detect "${rule}" as bot`, () => { - spawn.extend([rule]) - assert(spawn(useragent)) - }) - - it('should not extend an existing item', () => { - spawn.extend([rule]) - spawn.extend([rule]) - spawn.extend([rule]) - spawn.exclude([rule]) - assert(!spawn(useragent)) - }) - }) - - describe('spawn.exclude', () => { - const useragent = 'Mozilla/5.0 (Macintosh; intel mac os x 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.175 Safari/537.36 Chrome-Lighthouse' - const rule = 'chrome-lighthouse' - - it(`should detect "${rule}" as bot`, () => { - assert(spawn(useragent)) - }) - - it(`should not detect "${rule}" as bot`, () => { - spawn.exclude([rule]) - assert(!spawn(useragent)) - }) - - it('should remain silent when excluding non existing filter', () => { - spawn.exclude(['something']) - }) - }) - - describe('spawn.find', () => { - it('should not break with empty input', () => { - equal(spawn.find(), null) - }) - - it('should return null for non bot browser', () => { - equal(spawn.find('Mozilla/5.0 (Linux) Firefox/93.0'), null) - }) - - it('should return the rule used to identify as bot', () => { - equal(spawn.find('Mozilla/5.0 (compatible; SemrushBot-SA/0.97; +http://www.semrush.com/bot.html)'), 'Bot') - }) - - it('should be able to remove match using find', () => { - const ua = 'Mozilla/5.0 (Linux; Android 10; SNE-LX1 Build/HUAWEISNE-L21; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/94.0.4606.71 Mobile Safari/537.36 hap/1079/huawei com.huawei.fastapp/11.4.1.310 com.frvr.worlds.quickapp/4.0.17 ({"packageName":"quickSearch","type":"other","extra":"{}"})' - equal(spawn(ua), true) - spawn.exclude(spawn.matches(ua)) - equal(spawn(ua), false) - }) - }) - - describe('spawn.clear', () => { - it('should clear all rules relevant to a user agent string', () => { - const ua = 'Mozilla/5.0 (Linux; Android 10; SNE-LX1 Build/HUAWEISNE-L21; wv) AppleWebKit/537.36 (KHTML, like Gecko) Spider/1.0 Robot/1.0 Search/1.0 Chrome/94.0.4606.71' - equal(spawn(ua), true) - spawn.clear(ua) - equal(spawn(ua), false) - }) - - it('should clear the pattern', () => { - equal(spawn('Chrome-Lighthouse'), true) - spawn.clear(['chrome-lighthouse']) - equal(spawn('Chrome-Lighthouse'), false) - }) - }) - - describe('spawn.spawn', () => { - it('should spawn isbot with its own list', () => { - const newUA = 'Mozilla/5.0 (Linux) NeoBrowser/93.0' - const botUA = 'Mozilla/5.0 (compatible; SemrushBot-SA/0.97; +http://www.semrush.com/bot.html)' - const spawn2 = spawn.spawn(['neobrowser']) - assert(!spawn(newUA)) - assert(spawn2(newUA)) - assert(spawn(botUA)) - assert(!spawn2(botUA)) - }) - it('should not affect each others lists', () => { - const newUA = 'Mozilla/5.0 (Linux) NeoBrowser/93.0' - const spawn1 = spawn.spawn() - const spawn2 = spawn.spawn() - spawn1.extend(['neobrowser']) - assert(spawn1(newUA)) - assert(!spawn2(newUA)) - }) - it('should spawn from instance\'s list', () => { - const newUA = 'Mozilla/5.0 (Linux) NeoBrowser/93.0' - const spawn1 = spawn.spawn() - spawn1.extend(['neobrowser']) - const spawn2 = spawn1.spawn() - assert(spawn2(newUA)) - }) - }) - - describe('spawn.pattern', () => { - it('should expose the regular expression', () => { - assert(spawn.pattern instanceof RegExp) - - const { pattern: re1 } = spawn - const { pattern: re2 } = spawn - - notEqual(re1, re2) - equal(re1.toString(), re2.toString()) - - re2.compile('something') - notEqual(re1.toString(), re2.toString()) - }) - }) - } -) diff --git a/tsconfig.json b/tsconfig.json index d01684ab..2eb8a0ad 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,17 +1,15 @@ { "compilerOptions": { - "target": "esnext", + "lib": ["esnext"], "module": "esnext", "moduleResolution": "node", - "allowJs": true, - "checkJs": true, - "noEmit": true, - "strict": false, - "noImplicitThis": true, - "alwaysStrict": true, + "target": "esnext", + "types": ["@types/jest", "@types/node"], + "allowSyntheticDefaultImports": true, "esModuleInterop": true, + "noEmit": true, "resolveJsonModule": true }, - "include": ["index.d.ts"], - "exclude": [] + "include": ["src"], + "exclude": ["node_modules"] }