Skip to content

Commit

Permalink
Fix/upload limit (#2521)
Browse files Browse the repository at this point in the history
Co-authored-by: jyong <[email protected]>
Co-authored-by: StyleZhang <[email protected]>
  • Loading branch information
3 people authored Feb 22, 2024
1 parent 52b12ed commit 97fe817
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 14 deletions.
2 changes: 2 additions & 0 deletions api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL=

SSRF_PROXY_HTTP_URL=
SSRF_PROXY_HTTPS_URL=

BATCH_UPLOAD_LIMIT=10
3 changes: 3 additions & 0 deletions api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
'BILLING_ENABLED': 'False',
'CAN_REPLACE_LOGO': 'False',
'ETL_TYPE': 'dify',
'BATCH_UPLOAD_LIMIT': 20
}


Expand Down Expand Up @@ -285,6 +286,8 @@ def __init__(self):
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')

self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')


class CloudEditionConfig(Config):

Expand Down
17 changes: 17 additions & 0 deletions api/core/indexing_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from models.dataset import Document as DatasetDocument
from models.model import UploadFile
from models.source import DataSourceBinding
from services.feature_service import FeatureService


class IndexingRunner:
Expand Down Expand Up @@ -244,6 +245,14 @@ def file_indexing_estimate(self, tenant_id: str, file_details: list[UploadFile],
"""
Estimate the indexing for the document.
"""
# check document limit
features = FeatureService.get_features(tenant_id)
if features.billing.enabled:
count = len(file_details)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")

embedding_model_instance = None
if dataset_id:
dataset = Dataset.query.filter_by(
Expand Down Expand Up @@ -361,6 +370,14 @@ def notion_indexing_estimate(self, tenant_id: str, notion_info_list: list, tmp_p
"""
Estimate the indexing for the document.
"""
# check document limit
features = FeatureService.get_features(tenant_id)
if features.billing.enabled:
count = len(notion_info_list)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")

embedding_model_instance = None
if dataset_id:
dataset = Dataset.query.filter_by(
Expand Down
7 changes: 7 additions & 0 deletions api/services/annotation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
from services.feature_service import FeatureService
from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
Expand Down Expand Up @@ -284,6 +285,12 @@ def batch_import_app_annotations(cls, app_id, file: FileStorage) -> dict:
result.append(content)
if len(result) == 0:
raise ValueError("The CSV file is empty.")
# check annotation limit
features = FeatureService.get_features(current_user.current_tenant_id)
if features.billing.enabled:
annotation_quota_limit = features.annotation_quota_limit
if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
raise ValueError("The number of annotations exceeds the limit of your subscription.")
# async job
job_id = str(uuid.uuid4())
indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
Expand Down
32 changes: 22 additions & 10 deletions api/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from services.errors.dataset import DatasetNameDuplicateError
from services.errors.document import DocumentIndexingError
from services.errors.file import FileNotExistsError
from services.feature_service import FeatureService
from services.vector_service import VectorService
from tasks.clean_notion_document_task import clean_notion_document_task
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
Expand Down Expand Up @@ -452,7 +453,9 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict,
created_from: str = 'web'):

# check document limit
if current_app.config['EDITION'] == 'CLOUD':
features = FeatureService.get_features(current_user.current_tenant_id)

if features.billing.enabled:
if 'original_document_id' not in document_data or not document_data['original_document_id']:
count = 0
if document_data["data_source"]["type"] == "upload_file":
Expand All @@ -462,6 +465,9 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict,
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
for notion_info in notion_info_list:
count = count + len(notion_info['pages'])
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
# if dataset is empty, update dataset data_source_type
if not dataset.data_source_type:
dataset.data_source_type = document_data["data_source"]["type"]
Expand Down Expand Up @@ -741,14 +747,20 @@ def update_document_with_dataset_id(dataset: Dataset, document_data: dict,

@staticmethod
def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
count = 0
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
count = len(upload_file_list)
elif document_data["data_source"]["type"] == "notion_import":
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
for notion_info in notion_info_list:
count = count + len(notion_info['pages'])
features = FeatureService.get_features(current_user.current_tenant_id)

if features.billing.enabled:
count = 0
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
count = len(upload_file_list)
elif document_data["data_source"]["type"] == "notion_import":
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
for notion_info in notion_info_list:
count = count + len(notion_info['pages'])
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")

embedding_model = None
dataset_collection_binding_id = None
Expand Down Expand Up @@ -1139,7 +1151,7 @@ def update_segment(cls, args: dict, segment: DocumentSegment, document: Document
segment.answer = args['answer']
if 'keywords' in args and args['keywords']:
segment.keywords = args['keywords']
if'enabled' in args and args['enabled'] is not None:
if 'enabled' in args and args['enabled'] is not None:
segment.enabled = args['enabled']
db.session.add(segment)
db.session.commit()
Expand Down
4 changes: 2 additions & 2 deletions api/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])

ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
PREVIEW_WORDS_LIMIT = 3000


Expand Down
33 changes: 32 additions & 1 deletion api/tasks/document_indexing_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import click
from celery import shared_task
from flask import current_app

from core.indexing_runner import DocumentIsPausedException, IndexingRunner
from extensions.ext_database import db
from models.dataset import Document
from models.dataset import Dataset, Document
from services.feature_service import FeatureService


@shared_task(queue='dataset')
Expand All @@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list):
"""
documents = []
start_at = time.perf_counter()

dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()

# check document limit
features = FeatureService.get_features(dataset.tenant_id)
try:
if features.billing.enabled:
vector_space = features.vector_space
count = len(document_ids)
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
if count > batch_upload_limit:
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
if 0 < vector_space.limit <= vector_space.size:
raise ValueError("Your total number of documents plus the number of uploads have over the limit of "
"your subscription.")
except Exception as e:
for document_id in document_ids:
document = db.session.query(Document).filter(
Document.id == document_id,
Document.dataset_id == dataset_id
).first()
if document:
document.indexing_status = 'error'
document.error = str(e)
document.stopped_at = datetime.datetime.utcnow()
db.session.add(document)
db.session.commit()
return

for document_id in document_ids:
logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))

Expand Down
9 changes: 8 additions & 1 deletion web/app/components/datasets/create/file-uploader/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
import I18n from '@/context/i18n'
import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'

const FILES_NUMBER_LIMIT = 20

type IFileUploaderProps = {
fileList: FileItem[]
titleClassName?: string
Expand Down Expand Up @@ -176,6 +178,11 @@ const FileUploader = ({
if (!files.length)
return false

if (files.length + fileList.length > FILES_NUMBER_LIMIT) {
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) })
return false
}

const preparedFiles = files.map((file, index) => ({
fileID: `file${index}-${Date.now()}`,
file,
Expand All @@ -185,7 +192,7 @@ const FileUploader = ({
prepareFileList(newFiles)
fileListRef.current = newFiles
uploadMultipleFiles(preparedFiles)
}, [prepareFileList, uploadMultipleFiles])
}, [prepareFileList, uploadMultipleFiles, notify, t, fileList])

const handleDragEnter = (e: DragEvent) => {
e.preventDefault()
Expand Down
1 change: 1 addition & 0 deletions web/i18n/lang/dataset-creation.en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const translation = {
typeError: 'File type not supported',
size: 'File too large. Maximum is {{size}}MB',
count: 'Multiple files not supported',
filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.',
},
cancel: 'Cancel',
change: 'Change',
Expand Down
1 change: 1 addition & 0 deletions web/i18n/lang/dataset-creation.pt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const translation = {
typeError: 'Tipo de arquivo não suportado',
size: 'Arquivo muito grande. Máximo é {{size}}MB',
count: 'Vários arquivos não suportados',
filesNumber: 'Limite de upload em massa {{filesNumber}}.',
},
cancel: 'Cancelar',
change: 'Alterar',
Expand Down
1 change: 1 addition & 0 deletions web/i18n/lang/dataset-creation.uk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const translation = {
typeError: 'Тип файлу не підтримується',
size: 'Файл занадто великий. Максимум – {{size}} МБ',
count: 'Не підтримується завантаження кількох файлів',
filesNumber: 'Ліміт масового завантаження {{filesNumber}}.',
},
cancel: 'Скасувати',
change: 'Змінити',
Expand Down
1 change: 1 addition & 0 deletions web/i18n/lang/dataset-creation.zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const translation = {
typeError: '文件类型不支持',
size: '文件太大了,不能超过 {{size}}MB',
count: '暂不支持多个文件',
filesNumber: '批量上传限制 {{filesNumber}}。',
},
cancel: '取消',
change: '更改文件',
Expand Down

0 comments on commit 97fe817

Please sign in to comment.