chengchingwen · chengchingwen · Jun 9, 2024 · Dec 6, 2023 · Dec 6, 2023 · Dec 6, 2023
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
@@ -1,26 +1,16 @@
 name: CompatHelper
-
 on:
   schedule:
-    - cron: '00 * * * *'
-  issues:
-    types: [opened, reopened]
-
+    - cron: 0 0 * * *
+  workflow_dispatch:
 jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        julia-version: [1.5.1]
-        julia-arch: [x86]
-        os: [ubuntu-latest]
+  CompatHelper:
+    runs-on: ubuntu-latest
     steps:
-      - uses: julia-actions/setup-julia@latest
-        with:
-          version: ${{ matrix.julia-version }}
       - name: Pkg.add("CompatHelper")
         run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
       - name: CompatHelper.main()
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
         run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
@@ -1,9 +1,25 @@
 name: TagBot
 on:
- issue_comment:
+  issue_comment:
     types:
       - created
- workflow_dispatch:
+  workflow_dispatch:
+    inputs:
+      lookback:
+        default: "3"
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
 jobs:
   TagBot:
     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
@@ -12,3 +28,4 @@ jobs:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/ci-pretrain.yml b/.github/workflows/ci-pretrain.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,36 +1,43 @@
 name: CI
-
 on:
   push:
     branches:
       - master
     tags: '*'
   pull_request:
-
-defaults:
-  run:
-    shell: bash
-
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
 jobs:
   test:
-    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.julia-threads }} thread(s) - ${{ github.event_name }} 
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.julia-threads }} thread(s) - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
+    permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      actions: write
+      contents: read
     env:
       JULIA_NUM_THREADS: ${{ matrix.julia-threads }}
+      JL_TRF_TEST_TKR: ${{ matrix.test-hgf-tkr }}
+      HUGGINGFACEHUB_TOKEN: ${{ secrets.HUGGINGFACEHUB_TOKEN }}
     strategy:
       fail-fast: false
       matrix:
         version:
-          - '1.6' # Replace this with the minimum Julia version that your package supports.
-          - '1'   # automatically expands to the latest stable 1.x release of Julia
+          - '1.10' # Replace this with the minimum Julia version that your package supports.
+          - '1.11'
+          # - '1'   # automatically expands to the latest stable 1.x release of Julia
           # - 'nightly'
         os:
           - ubuntu-latest
         arch:
           - x64
         julia-threads:
           - '1'
-
+        test-hgf-tkr:
+          - 'false'
         include:
           - os: windows-latest
             version: '1'
@@ -44,40 +51,49 @@ jobs:
             version: '1'
             arch: x64
             julia-threads: '2'
-
+            test-hgf-tkr: 'true'
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/install-juliaup@v2
         with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
-      - run: julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.build(); Pkg.test(; coverage=true)';
+          channel: ${{ matrix.version }}
+      - uses: julia-actions/cache@v2
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v4
         with:
-          file: lcov.info
-
+          files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+          fail_ci_if_error: false
   docs:
     name: Documentation
     runs-on: ubuntu-latest
+    permissions:
+      actions: write # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      contents: write
+      statuses: write
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: '1'
-      - run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()));
-                                               Pkg.instantiate()'
-      - run: julia --project=docs docs/make.jl
+      - uses: julia-actions/cache@v2
+      - name: Configure doc environment
+        shell: julia --project=docs --color=yes {0}
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(path=pwd()))
+          Pkg.instantiate()
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
+      - name: Run doctests
+        shell: julia --project=docs --color=yes {0}
+        run: |
+          using Documenter: DocMeta, doctest
+          using Transformers
+          DocMeta.setdocmeta!(Transformers, :DocTestSetup, :(using Transformers); recursive=true)
+          doctest(Transformers)
diff --git a/Project.toml b/Project.toml
@@ -1,12 +1,12 @@
 name = "Transformers"
 uuid = "21ca0261-441d-5938-ace7-c90938fde4d4"
 authors = ["chengchingwen <[email protected]>"]
-version = "0.2.8"
+version = "0.3.0"
 
 [deps]
+BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66"
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 BytePairEncoding = "a4280ba5-8788-555a-8ca8-4a8c3d966a71"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
@@ -20,65 +20,82 @@ FuncPipelines = "9ed96fbb-10b6-44d4-99a6-7e2a3dc8861b"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 HuggingFaceApi = "3cc741c3-0c9d-4fbe-84fa-cdec264173de"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
 LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
 LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
 NeuralAttentionlib = "12afc1b8-fad6-47e1-9132-84abc478905f"
 Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PrimitiveOneHot = "13d12f88-f12b-451e-9b9f-13b97e01cc85"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+SafeTensors = "eeda0dda-7046-4914-a807-2495fc7abb89"
 Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StringViews = "354b36f9-a18e-4713-926e-db85100087ba"
 StructWalk = "31cdf514-beb7-4750-89db-dda9d2eb8d3d"
 TextEncodeBase = "f92c20c0-9f2a-4705-8116-881385faba05"
+Tricks = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 ValSplit = "0625e100-946b-11ec-09cd-6328dd093154"
 WordTokenizers = "796a5d58-b03d-544a-977e-18100b691f6e"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
+[weakdeps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+
+[extensions]
+TransformersAMDGPUExt = "AMDGPU"
+TransformersCUDAExt = "CUDA"
+TransformersMetalExt = "Metal"
+
 [compat]
-BytePairEncoding = "0.4"
-CUDA = "3.10, 4, 5"
+AMDGPU = "0.9"
+BangBang = "0.3"
+BytePairEncoding = "0.5.1"
+CUDA = "5"
 ChainRulesCore = "1.15"
 DataDeps = "0.7"
 DataStructures = "0.18"
 DoubleArrayTries = "0.1"
 Fetch = "0.1.3"
 FillArrays = "0.13, 1"
-Flux = "0.13.4"
+Flux = "0.14"
 FuncPipelines = "0.2.3"
 Functors = "0.2, 0.3, 0.4"
 HTTP = "0.9, 1"
 HuggingFaceApi = "0.1"
 JSON3 = "1.12"
 LRUCache = "1.5"
 LightXML = "0.9"
-NNlib = "0.8"
-NNlibCUDA = "0.2"
-NeuralAttentionlib = "0.2.12"
-Pickle = "0.3"
-PrimitiveOneHot = "0.1"
+Metal = "1.1"
+NNlib = "0.9"
+NeuralAttentionlib = "0.3"
+Pickle = "0.3.5"
+PrimitiveOneHot = "0.2"
+SafeTensors = "1.1.1"
 Static = "0.7, 0.8"
 StringViews = "1"
 StructWalk = "0.2"
-TextEncodeBase = "0.7"
+TextEncodeBase = "0.8.1"
+Tricks = "0.1.8"
 ValSplit = "0.1"
 WordTokenizers = "0.5.6"
 Zygote = "0.6.59"
-julia = "1.6"
+julia = "1.10"
 
 [extras]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["Test", "Logging", "ZipFile", "ChainRulesTestUtils"]
+test = ["Test", "Logging", "ZipFile", "CUDA", "ChainRulesTestUtils"]
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,2 +1,3 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4"
diff --git a/docs/make.jl b/docs/make.jl
@@ -19,7 +19,11 @@ makedocs(;
         "Tutorial" => "tutorial.md",
         "Layers" => "layers.md",
         "TextEncoders" => "textencoders.md",
-        "HuggingFace" => "huggingface.md",
+        "HuggingFace" => [
+            "User Interface" => "huggingface.md",
+            "Add New Models" => "huggingface_dev.md",
+        ],
+        "API Reference" => "api_ref.md",
         "ChangeLogs" => "changelog.md",
     ],
 )

diff --git a/docs/src/api_ref.md b/docs/src/api_ref.md
@@ -0,0 +1,8 @@
+# API Reference
+
+```@index
+```
+
+```@autodocs
+Modules = [Transformers]
+```
diff --git a/docs/src/getstarted.md b/docs/src/getstarted.md
@@ -192,12 +192,16 @@ julia> bert_model(encode(bertenc, "Peter Piper picked a peck of pickled peppers"
 
 ## GPU
 
-Transformers relies on `CUDA.jl` for the GPU stuffs. In `Flux` we normally use `Flux.gpu` to convert model or data to
- the device. In Transformers, we provide another 2 api (`enable_gpu` and `todevice`) for this. If `enable_gpu(true)` is
- set, `todevice` will be moving data to GPU device, otherwise it is copying data on CPU. *notice*: `enable_gpu` should
- only be called in script, it cannot be used during precompilation.
+Transformers relies on `CUDA.jl` (or `AMDGPU.jl`/`Metal.jl`) for the GPU stuffs.
+ In `Flux` we normally use `Flux.gpu` to convert model or data to the device.
+ In Transformers, we provide another 2 api (`enable_gpu` and `todevice`) for this.
+ If `enable_gpu(true)` is set, `todevice` will be moving data to GPU device, otherwise it is copying data on CPU.
+ The backend is selected by `Flux.gpu_backend!`. When calling `enable_gpu()`, corresponding GPU package (e.g. `CUDA.jl`)
+ will be loaded (equivalent to `using CUDA` in REPL), which requires GPU packages to be installed in the environment.
+ *notice*: `enable_gpu` should only be called in script, it cannot be used during precompilation.
 
 ```@docs
 enable_gpu
 todevice
+Transformers.togpudevice
 ```