Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 38 additions & 20 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@
"devDependencies": {
"@parcel/packager-ts": "^2.11.0",
"@parcel/transformer-typescript-types": "^2.11.0",
"@types/async": "^3.2.24",
"@types/google-protobuf": "^3.15.12",
"@types/js-yaml": "^4.0.9",
"@types/lodash": "^4.17.0",
"@types/node": "^20.11.16",
"@types/uuid": "^9.0.8",
"@typescript-eslint/eslint-plugin": "^6.19.1",
"@typescript-eslint/parser": "^6.19.1",
"@vitest/coverage-v8": "^1.3.1",
Expand All @@ -60,14 +63,16 @@
"dependencies": {
"@grpc/grpc-js": "^1.10.1",
"@grpc/proto-loader": "^0.7.10",
"async": "^3.2.5",
"chalk": "^5.3.0",
"clarifai-nodejs-grpc": "^10.0.9",
"clarifai-nodejs-grpc": "^10.3.2",
"csv-parse": "^5.5.5",
"from-protobuf-object": "^1.0.2",
"google-protobuf": "^3.21.2",
"js-yaml": "^4.1.0",
"lodash": "^4.17.21",
"safe-flat": "^2.1.0",
"uuidv4": "^6.2.13",
"uuid": "^9.0.1",
"winston": "^3.11.0",
"zod": "^3.22.4"
}
Expand Down
2 changes: 1 addition & 1 deletion src/client/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import * as yaml from "js-yaml";
import { validateWorkflow } from "../workflows/validate";
import { getYamlOutputInfoProto } from "../workflows/utils";
import { Model as ModelConstructor } from "./model";
import { uuid } from "uuidv4";
import { v4 as uuid } from "uuid";
import { fromProtobufObject } from "from-protobuf-object";
import { fromPartialProtobufObject } from "../utils/fromPartialProtobufObject";
import { flatten } from "safe-flat";
Expand Down
220 changes: 220 additions & 0 deletions src/client/dataset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
import {
DatasetVersion,
Dataset as GrpcDataset,
Input as GrpcInput,
} from "clarifai-nodejs-grpc/proto/clarifai/api/resources_pb";
import { UserError } from "../errors";
import { ClarifaiUrl, ClarifaiUrlHelper } from "../urls/helper";
import { AuthConfig } from "../utils/types";
import { Lister } from "./lister";
import { Input, InputBulkUpload } from "./input";
import {
DeleteDatasetVersionsRequest,
ListDatasetVersionsRequest,
PostDatasetVersionsRequest,
} from "clarifai-nodejs-grpc/proto/clarifai/api/service_pb";
import {
JavaScriptValue,
Struct,
} from "google-protobuf/google/protobuf/struct_pb";
import { promisifyGrpcCall } from "../utils/misc";
import { StatusCode } from "clarifai-nodejs-grpc/proto/clarifai/api/status/status_code_pb";

type DatasetConfig =
| {
authConfig?: AuthConfig;
datasetId: string;
datasetVersionId?: string;
url?: undefined;
}
| {
authConfig?: AuthConfig;
datasetId?: undefined;
datasetVersionId?: undefined;
url: ClarifaiUrl;
};

export class Dataset extends Lister {
private info: GrpcDataset = new GrpcDataset();
private batchSize: number = 128;
private input: Input;

constructor({ authConfig, datasetId, url, datasetVersionId }: DatasetConfig) {
if (url && datasetId) {
throw new UserError("You can only specify one of url or dataset_id.");
}
if (url) {
const [userId, appId, , _datasetId, _datasetVersionId] =
ClarifaiUrlHelper.splitClarifaiUrl(url);
if (authConfig) authConfig.userId = userId;
if (authConfig) authConfig.appId = appId;
datasetId = _datasetId;
datasetVersionId = _datasetVersionId;
}

super({ authConfig });
this.info.setId(datasetId!);
this.info.setVersion(new DatasetVersion().setId(datasetVersionId!));
this.input = new Input({ authConfig });
}

async createVersion({
id,
description,
metadata = {},
}: {
id: string;
description: string;
metadata?: Record<string, JavaScriptValue>;
}): Promise<DatasetVersion.AsObject> {
const request = new PostDatasetVersionsRequest();
request.setUserAppId(this.userAppId);
request.setDatasetId(this.info.getId());
const datasetVersion = new DatasetVersion();
datasetVersion.setId(id);
datasetVersion.setDescription(description);
datasetVersion.setMetadata(Struct.fromJavaScript(metadata));
request.setDatasetVersionsList([datasetVersion]);

const postDatasetVersions = promisifyGrpcCall(
this.STUB.client.postDatasetVersions,
this.STUB.client,
);

const response = await this.grpcRequest(postDatasetVersions, request);
const responseObject = response.toObject();
if (responseObject.status?.code !== StatusCode.SUCCESS) {
throw new Error(responseObject.status?.description);
}
console.info("\nDataset Version created\n%s", response.getStatus());

return responseObject.datasetVersionsList[0];
}

async deleteVersion(versionId: string): Promise<void> {
const request = new DeleteDatasetVersionsRequest();
request.setUserAppId(this.userAppId);
request.setDatasetId(this.info.getId());
request.setDatasetVersionIdsList([versionId]);

const deleteDatasetVersions = promisifyGrpcCall(
this.STUB.client.deleteDatasetVersions,
this.STUB.client,
);
const response = await this.grpcRequest(deleteDatasetVersions, request);
const responseObject = response.toObject();
if (responseObject.status?.code !== StatusCode.SUCCESS) {
throw new Error(responseObject.status?.description);
}
console.info("\nDataset Version Deleted\n%s", response.getStatus());
}

async *listVersions(
pageNo?: number,
perPage?: number,
): AsyncGenerator<DatasetVersion.AsObject[], void, unknown> {
const request = new ListDatasetVersionsRequest();
request.setUserAppId(this.userAppId);
request.setDatasetId(this.info.getId());

const listDatasetVersions = promisifyGrpcCall(
this.STUB.client.listDatasetVersions,
this.STUB.client,
);

const listDatasetVersionsGenerator = this.listPagesGenerator(
listDatasetVersions,
request,
pageNo,
perPage,
);

for await (const versions of listDatasetVersionsGenerator) {
yield versions.toObject().datasetVersionsList;
}
}

async uploadFromFolder({
folderPath,
inputType,
labels = false,
batchSize = this.batchSize,
uploadProgressEmitter,
}: {
folderPath: string;
inputType: "image" | "text";
labels: boolean;
batchSize?: number;
uploadProgressEmitter?: InputBulkUpload;
}): Promise<void> {
if (["image", "text"].indexOf(inputType) === -1) {
throw new UserError("Invalid input type");
}
let inputProtos: GrpcInput[] = [];
if (inputType === "image") {
inputProtos = Input.getImageInputsFromFolder({
folderPath: folderPath,
datasetId: this.info.getId(),
labels: labels,
});
}
if (inputType === "text") {
inputProtos = Input.getTextInputsFromFolder({
folderPath: folderPath,
datasetId: this.info.getId(),
labels: labels,
});
}
await this.input.bulkUpload({
inputs: inputProtos,
batchSize: batchSize,
uploadProgressEmitter,
});
}

async uploadFromCSV({
csvPath,
inputType = "text",
csvType,
labels = true,
batchSize = 128,
uploadProgressEmitter,
}: {
csvPath: string;
inputType?: "image" | "text" | "video" | "audio";
csvType: "raw" | "url" | "file";
labels?: boolean;
batchSize?: number;
uploadProgressEmitter?: InputBulkUpload;
}): Promise<void> {
if (!["image", "text", "video", "audio"].includes(inputType)) {
throw new UserError(
"Invalid input type, it should be image, text, audio, or video",
);
}
if (!["raw", "url", "file"].includes(csvType)) {
throw new UserError(
"Invalid csv type, it should be raw, url, or file_path",
);
}
if (!csvPath.endsWith(".csv")) {
throw new UserError("csvPath should be a csv file");
}
if (csvType === "raw" && inputType !== "text") {
throw new UserError("Only text input type is supported for raw csv type");
}
batchSize = Math.min(128, batchSize);
const inputProtos = await Input.getInputsFromCsv({
csvPath: csvPath,
inputType: inputType,
csvType: csvType,
datasetId: this.info.getId(),
labels: labels,
});
await this.input.bulkUpload({
inputs: inputProtos,
batchSize: batchSize,
uploadProgressEmitter,
});
}
}
Loading