-
Notifications
You must be signed in to change notification settings - Fork 2
Fallback to WASM tree-sitter parser #393
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| import { join } from 'path'; | ||
| import Parser from 'web-tree-sitter'; | ||
| import { DocumentType } from '../document/Document'; | ||
| import { readBufferIfExists } from '../utils/File'; | ||
|
|
||
| export interface GrammarConfig { | ||
| yamlGrammarPath?: string; | ||
| jsonGrammarPath?: string; | ||
| maxRetries?: number; | ||
| retryDelay?: number; | ||
| wasmBasePath?: string; | ||
| } | ||
|
|
||
| export class GrammarManager { | ||
| private static instance: GrammarManager; | ||
| private initialized = false; | ||
| private readonly grammarCache = new Map<DocumentType, Parser.Language>(); | ||
| private readonly loadingPromises = new Map<DocumentType, Promise<Parser.Language>>(); | ||
| private readonly config: Required<GrammarConfig>; | ||
|
|
||
| private constructor(config: GrammarConfig = {}) { | ||
| const basePath = config.wasmBasePath ?? this.getDefaultWasmPath(); | ||
|
|
||
| this.config = { | ||
| yamlGrammarPath: config.yamlGrammarPath ?? join(basePath, 'tree-sitter-yaml.wasm'), | ||
| jsonGrammarPath: config.jsonGrammarPath ?? join(basePath, 'tree-sitter-json.wasm'), | ||
| maxRetries: config.maxRetries ?? 3, | ||
| retryDelay: config.retryDelay ?? 100, | ||
| wasmBasePath: basePath, | ||
| }; | ||
| } | ||
|
|
||
| private getDefaultWasmPath(): string { | ||
| // In bundled environment, WASM files are in the same directory as the bundle | ||
| if (typeof __dirname !== 'undefined') { | ||
| // __dirname points to the bundle directory, WASM files are in ./wasm/ | ||
| return join(__dirname, 'wasm'); | ||
| } | ||
| // Fallback for different environments | ||
| return './wasm'; | ||
| } | ||
|
|
||
| public static getInstance(config?: GrammarConfig): GrammarManager { | ||
| if (!GrammarManager.instance) { | ||
| GrammarManager.instance = new GrammarManager(config); | ||
| } | ||
| return GrammarManager.instance; | ||
| } | ||
|
|
||
| private async ensureInitialized(): Promise<void> { | ||
| if (this.initialized) return; | ||
|
|
||
| await Parser.init({ | ||
| locateFile: (scriptName: string) => { | ||
| if (scriptName === 'tree-sitter.wasm') { | ||
| return join(this.config.wasmBasePath, '..', 'tree-sitter.wasm'); | ||
| } | ||
| return scriptName; | ||
| }, | ||
| }); | ||
|
|
||
| this.initialized = true; | ||
| } | ||
|
|
||
| private async loadGrammarWithRetry(type: DocumentType): Promise<Parser.Language> { | ||
| const grammarPath = type === DocumentType.YAML ? this.config.yamlGrammarPath : this.config.jsonGrammarPath; | ||
|
|
||
| let lastError: Error | undefined; | ||
|
|
||
| for (let attempt = 1; attempt <= this.config.maxRetries; attempt++) { | ||
| try { | ||
| const wasmBuffer = readBufferIfExists(grammarPath); | ||
| return await Parser.Language.load(wasmBuffer); | ||
| } catch (error) { | ||
| lastError = error as Error; | ||
|
|
||
| if (attempt < this.config.maxRetries) { | ||
| await new Promise((resolve) => setTimeout(resolve, this.config.retryDelay * attempt)); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| throw new Error( | ||
| `Failed to load ${type} grammar after ${this.config.maxRetries} attempts: ${lastError?.message}`, | ||
| ); | ||
| } | ||
|
|
||
| public async loadGrammar(type: DocumentType): Promise<Parser.Language> { | ||
| // Return cached grammar if available | ||
| const cached = this.grammarCache.get(type); | ||
| if (cached) { | ||
| return cached; | ||
| } | ||
|
|
||
| // Return existing loading promise if in progress | ||
| const existingPromise = this.loadingPromises.get(type); | ||
| if (existingPromise) { | ||
| return await existingPromise; | ||
| } | ||
|
|
||
| // Start new loading process | ||
| const loadingPromise = this.loadGrammarInternal(type); | ||
| this.loadingPromises.set(type, loadingPromise); | ||
|
|
||
| try { | ||
| const grammar = await loadingPromise; | ||
| this.grammarCache.set(type, grammar); | ||
| return grammar; | ||
| } finally { | ||
| this.loadingPromises.delete(type); | ||
| } | ||
| } | ||
|
|
||
| private async loadGrammarInternal(type: DocumentType): Promise<Parser.Language> { | ||
| await this.ensureInitialized(); | ||
| return await this.loadGrammarWithRetry(type); | ||
| } | ||
|
|
||
| public async preloadGrammars(types: DocumentType[] = [DocumentType.YAML, DocumentType.JSON]): Promise<void> { | ||
| const promises = types.map((type) => this.loadGrammar(type)); | ||
| await Promise.all(promises); | ||
| } | ||
|
|
||
| public isGrammarLoaded(type: DocumentType): boolean { | ||
| return this.grammarCache.has(type); | ||
| } | ||
|
|
||
| public clearCache(): void { | ||
| this.grammarCache.clear(); | ||
| this.loadingPromises.clear(); | ||
| } | ||
|
|
||
| public getGrammarPath(type: DocumentType): string { | ||
| return type === DocumentType.YAML ? this.config.yamlGrammarPath : this.config.jsonGrammarPath; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| import TreeSitterYaml from '@tree-sitter-grammars/tree-sitter-yaml'; | ||
| import Parser from 'tree-sitter'; | ||
| import TreeSitterJson from 'tree-sitter-json'; | ||
| import { LoggerFactory } from '../telemetry/LoggerFactory'; | ||
| import { WasmParserFactory } from './WasmParserFactory'; | ||
|
|
||
| const log = LoggerFactory.getLogger('ParserFactory'); | ||
|
|
||
| export interface ParserFactory { | ||
| createYamlParser(): Parser; | ||
| createJsonParser(): Parser; | ||
| initialize?(): Promise<void>; | ||
| } | ||
|
|
||
| class NativeParserFactory implements ParserFactory { | ||
| private readonly yamlParser: Parser; | ||
| private readonly jsonParser: Parser; | ||
| private wasmFallback?: WasmParserFactory; | ||
| private readonly nativeFailed: boolean = false; | ||
|
|
||
| constructor() { | ||
| try { | ||
| this.yamlParser = new Parser(); | ||
| this.yamlParser.setLanguage(TreeSitterYaml as unknown as Parser.Language); | ||
|
|
||
| this.jsonParser = new Parser(); | ||
| this.jsonParser.setLanguage(TreeSitterJson as unknown as Parser.Language); | ||
|
|
||
| log.info('Native tree-sitter parsers initialized successfully'); | ||
| } catch { | ||
| log.error('Native tree-sitter initialization failed, will use WASM fallback'); | ||
| this.nativeFailed = true; | ||
| this.yamlParser = new Parser(); | ||
| this.jsonParser = new Parser(); | ||
| this.initializeWasmFallback(); | ||
| } | ||
| } | ||
|
|
||
| private initializeWasmFallback(): void { | ||
| log.info('Initializing WASM fallback...'); | ||
| this.wasmFallback = new WasmParserFactory(); | ||
| this.wasmFallback.initialize().catch((error: unknown) => { | ||
| log.error(error, 'WASM fallback initialization failed'); | ||
| }); | ||
| } | ||
|
|
||
| createYamlParser(): Parser { | ||
| if (this.nativeFailed && this.wasmFallback) { | ||
| return this.wasmFallback.createYamlParser(); | ||
| } | ||
| return this.yamlParser; | ||
| } | ||
|
|
||
| createJsonParser(): Parser { | ||
| if (this.nativeFailed && this.wasmFallback) { | ||
| return this.wasmFallback.createJsonParser(); | ||
| } | ||
| return this.jsonParser; | ||
| } | ||
| } | ||
|
|
||
| // Environment detection and factory creation | ||
| const shouldForceWasm = (): boolean => { | ||
| return process.env.CLOUDFORMATIONLSP_USE_WASM === 'true'; | ||
| }; | ||
|
|
||
| // Initialize the factory - async initialization happens in background | ||
| let factoryInstance: ParserFactory; | ||
|
|
||
| if (shouldForceWasm()) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably want to run some of the e2e/integration tests with this flag enabled so we know everything is working correctly |
||
| log.info('Forcing WASM tree-sitter implementation (CLOUDFORMATIONLSP_USE_WASM=true)'); | ||
| const wasmFactory = new WasmParserFactory(); | ||
| // eslint-disable-next-line unicorn/prefer-top-level-await | ||
| wasmFactory.initialize().catch((error: unknown) => { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If initialization fails, should it fallback to native? |
||
| log.error(error, 'Failed to initialize WASM parser factory'); | ||
| }); | ||
| factoryInstance = wasmFactory; | ||
| } else { | ||
| log.info('Using native tree-sitter implementation with WASM fallback'); | ||
| factoryInstance = new NativeParserFactory(); | ||
| } | ||
|
|
||
| export const parserFactory: ParserFactory = factoryInstance; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this check whether we are on legacy linux instead of checking environment variable? Think this can just be a variable instead of a function