Skip to content

Commit

Permalink
perf: redirect request and err log replace (#768)
Browse files Browse the repository at this point in the history
perf: dataset openapi

openapi
  • Loading branch information
c121914yu authored Jan 23, 2024
1 parent 379673c commit 3181166
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 5 deletions.
8 changes: 4 additions & 4 deletions docSite/content/docs/development/openapi/dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ data 为集合的 ID。
{{< /tabs >}}


### 创建一个纯文本集合(商业版)
### 创建一个纯文本集合

传入一段文字,创建一个集合,会根据传入的文字进行分割。

Expand All @@ -351,7 +351,7 @@ data 为集合的 ID。
{{< markdownify >}}

```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \
--data-raw '{
Expand Down Expand Up @@ -418,7 +418,7 @@ data 为集合的 ID。
{{< /tab >}}
{{< /tabs >}}

### 创建一个链接集合(商业版)
### 创建一个链接集合

传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。

Expand All @@ -427,7 +427,7 @@ data 为集合的 ID。
{{< markdownify >}}

```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \
--data-raw '{
Expand Down
88 changes: 88 additions & 0 deletions projects/app/src/pages/api/core/dataset/collection/create/link.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';

export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
link,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as LinkCreateDatasetCollectionParams;

const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});

// 1. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, new Array(10))
});

// 2. create collection
const collectionId = await createOneCollection({
...body,
name: link,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.link,

trainingType,
chunkSize,
chunkSplitter,
qaPrompt,

rawLink: link
});

// 3. create bill and start sync
const { billId } = await createTrainingBill({
teamId,
tmbId,
appName: 'core.dataset.collection.Sync Collection',
billSource: BillSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel).name,
agentModel: getQAModel(dataset.agentModel).name
});
await reloadCollectionChunks({
collectionId,
tmbId,
billId
});

jsonRes(res, {
data: { collectionId }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
117 changes: 117 additions & 0 deletions projects/app/src/pages/api/core/dataset/collection/create/text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';

export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
name,
text,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as TextCreateDatasetCollectionParams;

const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});

// 1. split text to chunks
const { chunks } = splitText2Chunks({
text,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});

// 2. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, chunks)
});

// 3. create collection and training bill
const [collectionId, { billId }] = await Promise.all([
createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.virtual,

name,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,

hashRawText: hashStr(text),
rawTextLength: text.length
}),
createTrainingBill({
teamId,
tmbId,
appName: name,
billSource: BillSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getQAModel(dataset.agentModel)?.name
})
]);

// 4. push chunks to training queue
const insertResults = await pushDataToTrainingQueue({
teamId,
tmbId,
collectionId,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});

jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

export const config = {
api: {
bodyParser: {
sizeLimit: '10mb'
}
}
};
2 changes: 1 addition & 1 deletion projects/app/src/web/core/dataset/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) =>
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data);
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);

export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
Expand Down

0 comments on commit 3181166

Please sign in to comment.