diff --git a/README.md b/README.md index d8ee276ce..db16b52ae 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ Performance of different validators by [json-schema-benchmark](https://github.co - "union" keyword and user-defined keywords (can be used inside "metadata" member of the schema) - supports [browsers](https://ajv.js.org/guide/environments.html#browsers) and Node.js 10.x - current - [asynchronous loading](https://ajv.js.org/guide/managing-schemas.html#asynchronous-schema-loading) of referenced schemas during compilation +- optional parallel loading of referenced schemas during compilation via `enableParallelLoading` - "All errors" validation mode with [option allErrors](https://ajv.js.org/options.html#allerrors) - [error messages with parameters](https://ajv.js.org/api.html#validation-errors) describing error reasons to allow error message generation - i18n error messages support with [ajv-i18n](https://github.com/ajv-validator/ajv-i18n) package diff --git a/docs/api.md b/docs/api.md index 2a0680730..e0147a005 100644 --- a/docs/api.md +++ b/docs/api.md @@ -119,9 +119,9 @@ console.log(parseMyData.message) // property x not allowed ### ajv.compileAsync(schema: object, meta?: boolean): Promise < Function > -Asynchronous version of `compile` method that loads missing remote schemas using asynchronous function in `options.loadSchema`. This function returns a Promise that resolves to a validation function. An optional callback passed to `compileAsync` will be called with 2 parameters: error (or null) and validating function. The returned promise will reject (and the callback will be called with an error) when: +Asynchronous version of `compile` method that loads missing remote schemas using asynchronous function in `options.loadSchema` (optionally in parallel when `enableParallelLoading` is set to `true`). This function returns a `Promise` that resolves to a validation function. An optional callback passed to `compileAsync` will be called with 2 parameters: error (or `null`) and validating function. The returned promise will reject (and the callback will be called with an error) when: -- missing schema can't be loaded (`loadSchema` returns a Promise that rejects). +- missing schema can't be loaded (`loadSchema` returns a `Promise` that rejects). - a schema containing a missing reference is loaded, but the reference cannot be resolved. - schema (or some loaded/referenced schema) is invalid. diff --git a/docs/guide/combining-schemas.md b/docs/guide/combining-schemas.md index 410c7fb6e..141cd31b7 100644 --- a/docs/guide/combining-schemas.md +++ b/docs/guide/combining-schemas.md @@ -50,7 +50,7 @@ See [Options](../options.md) and [addSchema](../api.md#add-schema) method. - The actual location of the schema file in the file system is not used. - You can pass the identifier of the schema as the second parameter of `addSchema` method or as a property name in `schemas` option. This identifier can be used instead of (or in addition to) schema \$id. - You cannot have the same \$id (or the schema identifier) used for more than one schema - the exception will be thrown. -- You can implement dynamic resolution of the referenced schemas using `compileAsync` method. In this way you can store schemas in any system (files, web, database, etc.) and reference them without explicitly adding to Ajv instance. See [Asynchronous schema compilation](./managing-schemas.md#asynchronous-schema-compilation). +- You can implement dynamic resolution of the referenced schemas using `compileAsync` method. In this way you can store schemas in any system (files, web, database, etc.) and reference them without explicitly adding to `Ajv` instance. To preload references in parallel, pass `enableParallelLoading: true`. See [Asynchronous schema compilation](./managing-schemas.md#asynchronous-schema-compilation). ::: ## Extending recursive schemas diff --git a/docs/guide/managing-schemas.md b/docs/guide/managing-schemas.md index 8923b36e2..d79deb915 100644 --- a/docs/guide/managing-schemas.md +++ b/docs/guide/managing-schemas.md @@ -228,7 +228,7 @@ The above is possible because when the schema has `$id` attribute `compile` meth There are cases when you need to have a large collection of schemas stored in some database or on the remote server. In this case you are likely to use schema `$id` as some resource identifier to retrieve it - either network URI or database ID. -You can use `compileAsync` [method](./api.md#api-compileAsync) to asynchronously load the schemas as they are compiled, loading the schemas that are referenced from compiled schemas on demand. Ajv itself does not do any IO operations, it uses the function you supply via `loadSchema` [option](./api.md#options) to load schema from the passed ID. This function should return `Promise` that resolves to the schema (you can use async function, as in the example). +You can use `compileAsync` [method](./api.md#api-compileAsync) to asynchronously load the schemas as they are compiled, loading the schemas that are referenced from compiled schemas on demand. Ajv itself does not do any IO operations, it uses the function you supply via `loadSchema` [option](./api.md#options) to load schema from the passed ID. This function should return `Promise` that resolves to the schema (you can use async function, as in the example). To preload external references in **parallel**, pass `enableParallelLoading: true`. Example: diff --git a/docs/options.md b/docs/options.md index e8465e815..b8005c464 100644 --- a/docs/options.md +++ b/docs/options.md @@ -47,6 +47,7 @@ const defaultOptions = { schemas: {}, logger: undefined, loadSchema: undefined, // *, function(uri: string): Promise {} + enableParallelLoading: false, // options to modify validated data: removeAdditional: false, useDefaults: false, // * @@ -264,6 +265,10 @@ Option values: Asynchronous function that will be used to load remote schemas when `compileAsync` [method](#api-compileAsync) is used and some reference is missing (option `missingRefs` should NOT be 'fail' or 'ignore'). This function should accept remote schema uri as a parameter and return a Promise that resolves to a schema. See example in [Asynchronous compilation](./guide/managing-schemas.md#asynchronous-schema-compilation). +### enableParallelLoading + +If `true`, `Ajv` will pre-load external `$ref` targets during `compileAsync` by calling `loadSchema` in parallel. Default is `false` to keep the legacy sequential loading behavior for backward compatibility. + ## Options to modify validated data ### removeAdditional diff --git a/lib/core.ts b/lib/core.ts index 6ceedf541..982e1b4d2 100644 --- a/lib/core.ts +++ b/lib/core.ts @@ -58,10 +58,11 @@ import MissingRefError from "./compile/ref_error" import {getRules, ValidationRules, Rule, RuleGroup, JSONType} from "./compile/rules" import {SchemaEnv, compileSchema, resolveSchema} from "./compile" import {Code, ValueScope} from "./compile/codegen" -import {normalizeId, getSchemaRefs} from "./compile/resolve" +import {normalizeId, getFullPath, resolveUrl, getSchemaRefs} from "./compile/resolve" import {getJSONTypes} from "./compile/validate/dataType" import {eachItem} from "./compile/util" import * as $dataRefSchema from "./refs/data.json" +import * as traverse from "json-schema-traverse" import DefaultUriResolver from "./runtime/uri" @@ -85,6 +86,8 @@ const EXT_SCOPE_NAMES = new Set([ "Error", ]) +const REF_KEYS = ["$ref", "$recursiveRef", "$dynamicRef"] as const + export type Options = CurrentOptions & DeprecatedOptions export interface CurrentOptions { @@ -116,6 +119,7 @@ export interface CurrentOptions { schemas?: AnySchema[] | {[Key in string]?: AnySchema} logger?: Logger | false loadSchema?: (uri: string) => Promise + enableParallelLoading?: boolean // options to modify validated data: removeAdditional?: boolean | "all" | "failing" useDefaults?: boolean | "empty" @@ -424,6 +428,9 @@ export default class Ajv { ): Promise { await loadMetaSchema.call(this, _schema.$schema) const sch = this._addSchema(_schema, _meta) + if (this.opts.enableParallelLoading) { + await preloadExternalSchemas.call(this, sch, _meta) + } return sch.validate || _compileAsync.call(this, sch) } @@ -465,6 +472,42 @@ export default class Ajv { delete this._loading[ref] } } + + async function preloadExternalSchemas( + this: Ajv, + sch: SchemaEnv, + _meta?: boolean + ): Promise { + const seen = new Set() + let pending = collectMissingExternalRefs.call(this, sch.schema, sch.baseId, seen) + while (pending.length) { + const refs = pending + pending = [] + const results = await Promise.allSettled(refs.map((ref) => _loadSchema.call(this, ref))) + const errors: {ref: string; error: unknown}[] = [] + + for (let i = 0; i < results.length; i++) { + const result = results[i] + const ref = refs[i] + if (result.status === "rejected") { + errors.push({ref, error: result.reason}) + continue + } + const _schema = result.value + if (!this.refs[ref]) await loadMetaSchema.call(this, _schema.$schema) + if (!this.refs[ref]) this.addSchema(_schema, ref, _meta) + const schemaBaseId = + typeof _schema == "object" + ? normalizeId(_schema[this.opts.schemaId] || ref) + : normalizeId(ref) + pending.push(...collectMissingExternalRefs.call(this, _schema, schemaBaseId, seen)) + } + + if (errors.length) { + throw aggregateLoadErrors(errors) + } + } + } } // Adds schema to the instance @@ -758,6 +801,55 @@ export interface ErrorsTextOptions { dataVar?: string } +function collectMissingExternalRefs( + this: Ajv, + schema: AnySchema, + baseId: string, + seen: Set +): string[] { + if (typeof schema != "object") return [] + const {schemaId, uriResolver} = this.opts + const missing: string[] = [] + const baseIds: {[key: string]: string | undefined} = { + "": normalizeId(schema[schemaId] || baseId), + } + + traverse(schema, {allKeys: true}, (sch, jsonPtr, _, parentJsonPtr) => { + if (parentJsonPtr === undefined) return + if (typeof sch != "object") return + let innerBaseId = baseIds[parentJsonPtr] ?? baseIds[""] ?? "" + if (typeof sch[schemaId] == "string") { + innerBaseId = normalizeId( + innerBaseId ? uriResolver.resolve(innerBaseId, sch[schemaId]) : sch[schemaId] + ) + } + baseIds[jsonPtr] = innerBaseId + + for (const refKey of REF_KEYS) { + const ref = sch[refKey] + if (typeof ref != "string") continue + const fullRef = resolveUrl(uriResolver, innerBaseId, ref) + const refSchema = normalizeId(getFullPath(uriResolver, fullRef)) + if (!refSchema) continue + if (this.refs[refSchema] || this.schemas[refSchema]) continue + if (seen.has(refSchema)) continue + seen.add(refSchema) + missing.push(refSchema) + } + }) + + return missing +} + +function aggregateLoadErrors(errors: {ref: string; error: unknown}[]): Error { + const message = `Failed to load ${errors.length} schema reference(s): ${errors + .map(({ref}) => ref) + .join(", ")}` + const err = new Error(message) as Error & {errors?: {ref: string; error: unknown}[]} + err.errors = errors + return err +} + function checkOptions( this: Ajv, checkOpts: OptionsInfo, diff --git a/spec/async.spec.ts b/spec/async.spec.ts index 5de05c8ab..b26803a4b 100644 --- a/spec/async.spec.ts +++ b/spec/async.spec.ts @@ -121,6 +121,99 @@ describe("compileAsync method", () => { }) }) + it("should start loading multiple external refs in parallel when enabled", () => { + const started: string[] = [] + const resolvers = new Map void>() + const deferred = new Set([ + "http://example.com/object.json", + "http://example.com/other.json", + ]) + + const customAjv = new _Ajv({ + enableParallelLoading: true, + loadSchema(uri) { + started.push(uri) + if (deferred.has(uri)) { + return new Promise((resolve) => { + resolvers.set(uri, resolve) + }) + } + if (SCHEMAS[uri]) return Promise.resolve(SCHEMAS[uri]) + return Promise.reject(new Error("404")) + }, + }) + + const schema = { + $id: "http://example.com/parent-parallel.json", + type: "object", + properties: { + a: {$ref: "object.json"}, + b: {$ref: "other.json"}, + }, + } + + const p = customAjv.compileAsync(schema) + + return Promise.resolve() + .then(() => { + // With parallel loading enabled, Ajv should kick off both loadSchema calls immediately. + // If it were still sequential, we'd only see the first ref here until it resolves. + started.should.include.members([ + "http://example.com/object.json", + "http://example.com/other.json", + ]) + resolvers.get("http://example.com/object.json")?.(SCHEMAS["http://example.com/object.json"]) + resolvers.get("http://example.com/other.json")?.(SCHEMAS["http://example.com/other.json"]) + return p + }) + .then((validate) => { + validate.should.be.a("function") + }) + }) + + it("should not preload external refs in parallel by default", () => { + const started: string[] = [] + const resolvers = new Map void>() + const deferred = new Set(["http://example.com/object.json"]) + + const customAjv = new _Ajv({ + loadSchema(uri) { + started.push(uri) + if (deferred.has(uri)) { + return new Promise((resolve) => { + resolvers.set(uri, resolve) + }) + } + if (SCHEMAS[uri]) return Promise.resolve(SCHEMAS[uri]) + return Promise.reject(new Error("404")) + }, + }) + + const schema = { + $id: "http://example.com/parent-serial.json", + type: "object", + properties: { + a: {$ref: "object.json"}, + b: {$ref: "other.json"}, + }, + } + + const p = customAjv.compileAsync(schema) + + return Promise.resolve() + .then(() => { + // Leave the first ref pending to force the legacy sequential path. + // In sequential mode Ajv can't discover the next external ref until this one resolves, + // so only "object.json" should have started at this point. + started.should.deep.equal(["http://example.com/object.json"]) + resolvers.get("http://example.com/object.json")?.(SCHEMAS["http://example.com/object.json"]) + return p + }) + .then((validate) => { + validate.should.be.a("function") + }) + }) + it("should correctly load schemas when missing reference has JSON path", () => { const schema = { $id: "http://example.com/parent.json",