diff --git a/doc/doc_ch/algorithm_rec_latex_ocr.md b/doc/doc_ch/algorithm_rec_latex_ocr.md
index 91a9f0ca2a..9acc861828 100644
--- a/doc/doc_ch/algorithm_rec_latex_ocr.md
+++ b/doc/doc_ch/algorithm_rec_latex_ocr.md
@@ -33,6 +33,11 @@
## 2. 环境配置
请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。
+此外,需要安装额外的依赖:
+```shell
+pip install "tokenizers==0.19.1" "imagesize"
+```
+
## 3. 模型训练、评估、预测
diff --git a/doc/doc_en/algorithm_rec_latex_ocr_en.md b/doc/doc_en/algorithm_rec_latex_ocr_en.md
index 087d40145a..fcb8863d30 100644
--- a/doc/doc_en/algorithm_rec_latex_ocr_en.md
+++ b/doc/doc_en/algorithm_rec_latex_ocr_en.md
@@ -31,6 +31,10 @@ Using LaTeX-OCR printed mathematical expression recognition datasets for trainin
## 2. Environment
Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
+Furthermore, additional dependencies need to be installed:
+```shell
+pip install "tokenizers==0.19.1" "imagesize"
+```
## 3. Model Training / Evaluation / Prediction
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 430e8ef80b..7d4afec4e3 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -26,7 +26,6 @@
import random
from random import sample
from collections import defaultdict
-from tokenizers import Tokenizer as TokenizerFast
from ppocr.utils.logging import get_logger
from ppocr.data.imaug.vqa.augment import order_by_tbyx
@@ -1780,6 +1779,8 @@ def __init__(
rec_char_dict_path,
**kwargs,
):
+ from tokenizers import Tokenizer as TokenizerFast
+
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
self.pad_token_id = 0
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index a81d62f4d8..3902c3f92d 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -15,7 +15,6 @@
import numpy as np
import paddle
from paddle.nn import functional as F
-from tokenizers import Tokenizer as TokenizerFast
import re
@@ -1217,6 +1216,8 @@ class LaTeXOCRDecode(object):
"""Convert between latex-symbol and symbol-index"""
def __init__(self, rec_char_dict_path, **kwargs):
+ from tokenizers import Tokenizer as TokenizerFast
+
super(LaTeXOCRDecode, self).__init__()
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
diff --git a/ppocr/utils/formula_utils/math_txt2pkl.py b/ppocr/utils/formula_utils/math_txt2pkl.py
index 748fcb1ba1..e7ddcb5d44 100644
--- a/ppocr/utils/formula_utils/math_txt2pkl.py
+++ b/ppocr/utils/formula_utils/math_txt2pkl.py
@@ -15,8 +15,7 @@
import pickle
from tqdm import tqdm
import os
-import cv2
-import imagesize
+from paddle.utils import try_import
from collections import defaultdict
import glob
from os.path import join
@@ -24,6 +23,7 @@
def txt2pickle(images, equations, save_dir):
+ imagesize = try_import("imagesize")
save_p = os.path.join(save_dir, "latexocr_{}.pkl".format(images.split("/")[-1]))
min_dimensions = (32, 32)
max_dimensions = (672, 192)
diff --git a/requirements.txt b/requirements.txt
index 40afd21d6e..61a6022de1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,5 +13,3 @@ Pillow
pyyaml
requests
albumentations==1.4.10
-tokenizers==0.19.1
-imagesize