Skip to content

Commit

Permalink
Knowledge optimization (langgenius#3755)
Browse files Browse the repository at this point in the history
Co-authored-by: crazywoola <[email protected]>
Co-authored-by: JzoNg <[email protected]>
  • Loading branch information
3 people authored Apr 24, 2024
1 parent 3cd8e6f commit f257f2c
Show file tree
Hide file tree
Showing 75 changed files with 2,754 additions and 264 deletions.
3 changes: 3 additions & 0 deletions api/controllers/console/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,8 @@
workflow,
)

# Import tag controllers
from .tag import tags

# Import workspace controllers
from .workspace import account, members, model_providers, models, tool_providers, workspace
23 changes: 19 additions & 4 deletions api/controllers/console/app/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
import json
import uuid

from flask_login import current_user
from flask_restful import Resource, inputs, marshal_with, reqparse
from werkzeug.exceptions import BadRequest, Forbidden
from flask_restful import Resource, inputs, marshal, marshal_with, reqparse
from werkzeug.exceptions import BadRequest, Forbidden, abort

from controllers.console import api
from controllers.console.app.wraps import get_app_model
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required, cloud_edition_billing_resource_check
from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ToolParameterConfigurationManager
from fields.app_fields import (
app_detail_fields,
app_detail_fields_with_site,
app_pagination_fields,
)
from libs.login import login_required
from models.model import App, AppMode, AppModelConfig
from services.app_service import AppService
from services.tag_service import TagService

ALLOW_CREATE_APP_MODES = ['chat', 'agent-chat', 'advanced-chat', 'workflow', 'completion']

Expand All @@ -22,21 +29,29 @@ class AppListApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_pagination_fields)
def get(self):
"""Get app list"""
def uuid_list(value):
try:
return [str(uuid.UUID(v)) for v in value.split(',')]
except ValueError:
abort(400, message="Invalid UUID format in tag_ids.")
parser = reqparse.RequestParser()
parser.add_argument('page', type=inputs.int_range(1, 99999), required=False, default=1, location='args')
parser.add_argument('limit', type=inputs.int_range(1, 100), required=False, default=20, location='args')
parser.add_argument('mode', type=str, choices=['chat', 'workflow', 'agent-chat', 'channel', 'all'], default='all', location='args', required=False)
parser.add_argument('name', type=str, location='args', required=False)
parser.add_argument('tag_ids', type=uuid_list, location='args', required=False)

args = parser.parse_args()

# get app list
app_service = AppService()
app_pagination = app_service.get_paginate_apps(current_user.current_tenant_id, args)
if not app_pagination:
return {'data': [], 'total': 0, 'page': 1, 'limit': 20, 'has_more': False}

return app_pagination
return marshal(app_pagination, app_pagination_fields)

@setup_required
@login_required
Expand Down
26 changes: 25 additions & 1 deletion api/controllers/console/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,14 @@ def get(self):
limit = request.args.get('limit', default=20, type=int)
ids = request.args.getlist('ids')
provider = request.args.get('provider', default="vendor")
search = request.args.get('keyword', default=None, type=str)
tag_ids = request.args.getlist('tag_ids')

if ids:
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
else:
datasets, total = DatasetService.get_datasets(page, limit, provider,
current_user.current_tenant_id, current_user)
current_user.current_tenant_id, current_user, search, tag_ids)

# check embedding setting
provider_manager = ProviderManager()
Expand Down Expand Up @@ -184,6 +187,10 @@ def patch(self, dataset_id):
help='Invalid indexing technique.')
parser.add_argument('permission', type=str, location='json', choices=(
'only_me', 'all_team_members'), help='Invalid permission.')
parser.add_argument('embedding_model', type=str,
location='json', help='Invalid embedding model.')
parser.add_argument('embedding_model_provider', type=str,
location='json', help='Invalid embedding model provider.')
parser.add_argument('retrieval_model', type=dict, location='json', help='Invalid retrieval model.')
args = parser.parse_args()

Expand Down Expand Up @@ -506,10 +513,27 @@ def get(self, vector_type):
else:
raise ValueError("Unsupported vector db type.")

class DatasetErrorDocs(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
results = DocumentService.get_error_documents_by_dataset_id(dataset_id_str)

return {
'data': [marshal(item, document_status_fields) for item in results],
'total': len(results)
}, 200


api.add_resource(DatasetListApi, '/datasets')
api.add_resource(DatasetApi, '/datasets/<uuid:dataset_id>')
api.add_resource(DatasetQueryApi, '/datasets/<uuid:dataset_id>/queries')
api.add_resource(DatasetErrorDocs, '/datasets/<uuid:dataset_id>/error-docs')
api.add_resource(DatasetIndexingEstimateApi, '/datasets/indexing-estimate')
api.add_resource(DatasetRelatedAppListApi, '/datasets/<uuid:dataset_id>/related-apps')
api.add_resource(DatasetIndexingStatusApi, '/datasets/<uuid:dataset_id>/indexing-status')
Expand Down
47 changes: 46 additions & 1 deletion api/controllers/console/datasets/datasets_document.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from datetime import datetime, timezone

from flask import request
Expand Down Expand Up @@ -233,7 +234,7 @@ def post(self, dataset_id):
location='json')
parser.add_argument('data_source', type=dict, required=False, location='json')
parser.add_argument('process_rule', type=dict, required=False, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('duplicate', type=bool, default=True, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False,
Expand Down Expand Up @@ -883,6 +884,49 @@ def patch(self, dataset_id, document_id):
return {'result': 'success'}, 204


class DocumentRetryApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
"""retry document."""

parser = reqparse.RequestParser()
parser.add_argument('document_ids', type=list, required=True, nullable=False,
location='json')
args = parser.parse_args()
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
retry_documents = []
if not dataset:
raise NotFound('Dataset not found.')
for document_id in args['document_ids']:
try:
document_id = str(document_id)

document = DocumentService.get_document(dataset.id, document_id)

# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")

# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()

# 400 if document is completed
if document.indexing_status == 'completed':
raise DocumentAlreadyFinishedError()
retry_documents.append(document)
except Exception as e:
logging.error(f"Document {document_id} retry failed: {str(e)}")
continue
# retry document
DocumentService.retry_document(dataset_id, retry_documents)

return {'result': 'success'}, 204


api.add_resource(GetProcessRuleApi, '/datasets/process-rule')
api.add_resource(DatasetDocumentListApi,
'/datasets/<uuid:dataset_id>/documents')
Expand All @@ -908,3 +952,4 @@ def patch(self, dataset_id, document_id):
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>')
api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause')
api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume')
api.add_resource(DocumentRetryApi, '/datasets/<uuid:dataset_id>/retry')
159 changes: 159 additions & 0 deletions api/controllers/console/tag/tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from flask import request
from flask_login import current_user
from flask_restful import Resource, marshal_with, reqparse
from werkzeug.exceptions import Forbidden

from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from fields.tag_fields import tag_fields
from libs.login import login_required
from models.model import Tag
from services.tag_service import TagService


def _validate_name(name):
if not name or len(name) < 1 or len(name) > 40:
raise ValueError('Name must be between 1 to 50 characters.')
return name


class TagListApi(Resource):

@setup_required
@login_required
@account_initialization_required
@marshal_with(tag_fields)
def get(self):
tag_type = request.args.get('type', type=str)
keyword = request.args.get('keyword', default=None, type=str)
tags = TagService.get_tags(tag_type, current_user.current_tenant_id, keyword)

return tags, 200

@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()

parser = reqparse.RequestParser()
parser.add_argument('name', nullable=False, required=True,
help='Name must be between 1 to 50 characters.',
type=_validate_name)
parser.add_argument('type', type=str, location='json',
choices=Tag.TAG_TYPE_LIST,
nullable=True,
help='Invalid tag type.')
args = parser.parse_args()
tag = TagService.save_tags(args)

response = {
'id': tag.id,
'name': tag.name,
'type': tag.type,
'binding_count': 0
}

return response, 200


class TagUpdateDeleteApi(Resource):

@setup_required
@login_required
@account_initialization_required
def patch(self, tag_id):
tag_id = str(tag_id)
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()

parser = reqparse.RequestParser()
parser.add_argument('name', nullable=False, required=True,
help='Name must be between 1 to 50 characters.',
type=_validate_name)
args = parser.parse_args()
tag = TagService.update_tags(args, tag_id)

binding_count = TagService.get_tag_binding_count(tag_id)

response = {
'id': tag.id,
'name': tag.name,
'type': tag.type,
'binding_count': binding_count
}

return response, 200

@setup_required
@login_required
@account_initialization_required
def delete(self, tag_id):
tag_id = str(tag_id)
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()

TagService.delete_tag(tag_id)

return 200


class TagBindingCreateApi(Resource):

@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()

parser = reqparse.RequestParser()
parser.add_argument('tag_ids', type=list, nullable=False, required=True, location='json',
help='Tag IDs is required.')
parser.add_argument('target_id', type=str, nullable=False, required=True, location='json',
help='Target ID is required.')
parser.add_argument('type', type=str, location='json',
choices=Tag.TAG_TYPE_LIST,
nullable=True,
help='Invalid tag type.')
args = parser.parse_args()
TagService.save_tag_binding(args)

return 200


class TagBindingDeleteApi(Resource):

@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()

parser = reqparse.RequestParser()
parser.add_argument('tag_id', type=str, nullable=False, required=True,
help='Tag ID is required.')
parser.add_argument('target_id', type=str, nullable=False, required=True,
help='Target ID is required.')
parser.add_argument('type', type=str, location='json',
choices=Tag.TAG_TYPE_LIST,
nullable=True,
help='Invalid tag type.')
args = parser.parse_args()
TagService.delete_tag_binding(args)

return 200


api.add_resource(TagListApi, '/tags')
api.add_resource(TagUpdateDeleteApi, '/tags/<uuid:tag_id>')
api.add_resource(TagBindingCreateApi, '/tag-bindings/create')
api.add_resource(TagBindingDeleteApi, '/tag-bindings/remove')
5 changes: 4 additions & 1 deletion api/controllers/service_api/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ def get(self, tenant_id):
page = request.args.get('page', default=1, type=int)
limit = request.args.get('limit', default=20, type=int)
provider = request.args.get('provider', default="vendor")
search = request.args.get('keyword', default=None, type=str)
tag_ids = request.args.getlist('tag_ids')

datasets, total = DatasetService.get_datasets(page, limit, provider,
tenant_id, current_user)
tenant_id, current_user, search, tag_ids)
# check embedding setting
provider_manager = ProviderManager()
configurations = provider_manager.get_configurations(
Expand Down
Loading

0 comments on commit f257f2c

Please sign in to comment.