Skip to content

Commit 161c965

Browse files
committed
Fallback to WASM tree-sitter parser
1 parent c99b042 commit 161c965

File tree

11 files changed

+1137
-13
lines changed

11 files changed

+1137
-13
lines changed

package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
"pyodide": "0.28.2",
9393
"tree-sitter": "0.22.4",
9494
"tree-sitter-json": "0.24.8",
95+
"web-tree-sitter": "0.22.4",
9596
"ts-essentials": "10.1.1",
9697
"vscode-languageserver": "9.0.1",
9798
"vscode-languageserver-textdocument": "1.0.12",

src/context/syntaxtree/SyntaxTree.ts

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import YamlGrammar from '@tree-sitter-grammars/tree-sitter-yaml';
2-
import Parser, { Edit, Point, SyntaxNode, Tree, Language } from 'tree-sitter';
3-
import JsonGrammar from 'tree-sitter-json';
1+
import { Edit, Point, SyntaxNode, Tree } from 'tree-sitter';
42
import { Position } from 'vscode-languageserver-textdocument';
53
import { DocumentType } from '../../document/Document';
64
import { createEdit } from '../../document/DocumentUtils';
5+
import { parserFactory } from '../../parser/ParserFactory';
76
import { Measure } from '../../telemetry/TelemetryDecorator';
87
import { TopLevelSection, TopLevelSections, IntrinsicsSet } from '../ContextType';
98
import { normalizeIntrinsicFunction } from '../semantic/Intrinsics';
@@ -15,20 +14,13 @@ import { NodeType } from './utils/NodeType';
1514
import { createSyntheticNode } from './utils/SyntheticEntityFactory';
1615
import { CommonNodeTypes, JsonNodeTypes, YamlNodeTypes } from './utils/TreeSitterTypes';
1716

18-
// Optimization to only load the different language grammars once
19-
// Loading native/wasm code is expensive
20-
const JSON_PARSER = new Parser();
21-
JSON_PARSER.setLanguage(JsonGrammar as Language);
22-
23-
const YAML_PARSER = new Parser();
24-
YAML_PARSER.setLanguage(YamlGrammar as Language);
25-
2617
export type PropertyPath = ReadonlyArray<string | number>;
2718
export type PathAndEntity = {
2819
path: ReadonlyArray<SyntaxNode>; // All nodes from target to root
2920
propertyPath: PropertyPath; // Path like ["Resources", "MyBucket", "Properties"]
3021
entityRootNode?: SyntaxNode; // The complete entity definition (e.g., entire resource)
3122
};
23+
3224
const LARGE_NODE_TEXT_LIMIT = 200; // If a node's text is > 200 chars, we are likely not at the most specific node (indicating that it might be invalid)
3325

3426
export abstract class SyntaxTree {
@@ -42,9 +34,9 @@ export abstract class SyntaxTree {
4234
content: string,
4335
) {
4436
if (type === DocumentType.YAML) {
45-
this.parser = YAML_PARSER;
37+
this.parser = parserFactory.createYamlParser();
4638
} else {
47-
this.parser = JSON_PARSER;
39+
this.parser = parserFactory.createJsonParser();
4840
}
4941
this.rawContent = content;
5042
this.tree = this.parser.parse(this.rawContent);

src/parser/GrammarManager.ts

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import { join } from 'path';
2+
import Parser from 'web-tree-sitter';
3+
import { DocumentType } from '../document/Document';
4+
import { readBufferIfExists } from '../utils/File';
5+
6+
export interface GrammarConfig {
7+
yamlGrammarPath?: string;
8+
jsonGrammarPath?: string;
9+
maxRetries?: number;
10+
retryDelay?: number;
11+
wasmBasePath?: string;
12+
}
13+
14+
export class GrammarManager {
15+
private static instance: GrammarManager;
16+
private initialized = false;
17+
private readonly grammarCache = new Map<DocumentType, Parser.Language>();
18+
private readonly loadingPromises = new Map<DocumentType, Promise<Parser.Language>>();
19+
private readonly config: Required<GrammarConfig>;
20+
21+
private constructor(config: GrammarConfig = {}) {
22+
const basePath = config.wasmBasePath ?? this.getDefaultWasmPath();
23+
24+
this.config = {
25+
yamlGrammarPath: config.yamlGrammarPath ?? join(basePath, 'tree-sitter-yaml.wasm'),
26+
jsonGrammarPath: config.jsonGrammarPath ?? join(basePath, 'tree-sitter-json.wasm'),
27+
maxRetries: config.maxRetries ?? 3,
28+
retryDelay: config.retryDelay ?? 100,
29+
wasmBasePath: basePath,
30+
};
31+
}
32+
33+
private getDefaultWasmPath(): string {
34+
// In bundled environment, WASM files are in the same directory as the bundle
35+
if (typeof __dirname !== 'undefined') {
36+
// __dirname points to the bundle directory, WASM files are in ./wasm/
37+
return join(__dirname, 'wasm');
38+
}
39+
// Fallback for different environments
40+
return './wasm';
41+
}
42+
43+
public static getInstance(config?: GrammarConfig): GrammarManager {
44+
if (!GrammarManager.instance) {
45+
GrammarManager.instance = new GrammarManager(config);
46+
}
47+
return GrammarManager.instance;
48+
}
49+
50+
private async ensureInitialized(): Promise<void> {
51+
if (this.initialized) return;
52+
53+
await Parser.init({
54+
locateFile: (scriptName: string) => {
55+
if (scriptName === 'tree-sitter.wasm') {
56+
return join(this.config.wasmBasePath, '..', 'tree-sitter.wasm');
57+
}
58+
return scriptName;
59+
},
60+
});
61+
62+
this.initialized = true;
63+
}
64+
65+
private async loadGrammarWithRetry(type: DocumentType): Promise<Parser.Language> {
66+
const grammarPath = type === DocumentType.YAML ? this.config.yamlGrammarPath : this.config.jsonGrammarPath;
67+
68+
let lastError: Error | undefined;
69+
70+
for (let attempt = 1; attempt <= this.config.maxRetries; attempt++) {
71+
try {
72+
const wasmBuffer = readBufferIfExists(grammarPath);
73+
return await Parser.Language.load(wasmBuffer);
74+
} catch (error) {
75+
lastError = error as Error;
76+
77+
if (attempt < this.config.maxRetries) {
78+
await new Promise((resolve) => setTimeout(resolve, this.config.retryDelay * attempt));
79+
}
80+
}
81+
}
82+
83+
throw new Error(
84+
`Failed to load ${type} grammar after ${this.config.maxRetries} attempts: ${lastError?.message}`,
85+
);
86+
}
87+
88+
public async loadGrammar(type: DocumentType): Promise<Parser.Language> {
89+
// Return cached grammar if available
90+
const cached = this.grammarCache.get(type);
91+
if (cached) {
92+
return cached;
93+
}
94+
95+
// Return existing loading promise if in progress
96+
const existingPromise = this.loadingPromises.get(type);
97+
if (existingPromise) {
98+
return await existingPromise;
99+
}
100+
101+
// Start new loading process
102+
const loadingPromise = this.loadGrammarInternal(type);
103+
this.loadingPromises.set(type, loadingPromise);
104+
105+
try {
106+
const grammar = await loadingPromise;
107+
this.grammarCache.set(type, grammar);
108+
return grammar;
109+
} finally {
110+
this.loadingPromises.delete(type);
111+
}
112+
}
113+
114+
private async loadGrammarInternal(type: DocumentType): Promise<Parser.Language> {
115+
await this.ensureInitialized();
116+
return await this.loadGrammarWithRetry(type);
117+
}
118+
119+
public async preloadGrammars(types: DocumentType[] = [DocumentType.YAML, DocumentType.JSON]): Promise<void> {
120+
const promises = types.map((type) => this.loadGrammar(type));
121+
await Promise.all(promises);
122+
}
123+
124+
public isGrammarLoaded(type: DocumentType): boolean {
125+
return this.grammarCache.has(type);
126+
}
127+
128+
public clearCache(): void {
129+
this.grammarCache.clear();
130+
this.loadingPromises.clear();
131+
}
132+
133+
public getGrammarPath(type: DocumentType): string {
134+
return type === DocumentType.YAML ? this.config.yamlGrammarPath : this.config.jsonGrammarPath;
135+
}
136+
}

src/parser/ParserFactory.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import TreeSitterYaml from '@tree-sitter-grammars/tree-sitter-yaml';
2+
import Parser from 'tree-sitter';
3+
import TreeSitterJson from 'tree-sitter-json';
4+
import { LoggerFactory } from '../telemetry/LoggerFactory';
5+
import { WasmParserFactory } from './WasmParserFactory';
6+
7+
const log = LoggerFactory.getLogger('ParserFactory');
8+
9+
export interface ParserFactory {
10+
createYamlParser(): Parser;
11+
createJsonParser(): Parser;
12+
initialize?(): Promise<void>;
13+
}
14+
15+
class NativeParserFactory implements ParserFactory {
16+
private readonly yamlParser: Parser;
17+
private readonly jsonParser: Parser;
18+
private wasmFallback?: WasmParserFactory;
19+
private readonly nativeFailed: boolean = false;
20+
21+
constructor() {
22+
try {
23+
this.yamlParser = new Parser();
24+
this.yamlParser.setLanguage(TreeSitterYaml as unknown as Parser.Language);
25+
26+
this.jsonParser = new Parser();
27+
this.jsonParser.setLanguage(TreeSitterJson as unknown as Parser.Language);
28+
29+
log.info('Native tree-sitter parsers initialized successfully');
30+
} catch {
31+
log.error('Native tree-sitter initialization failed, will use WASM fallback');
32+
this.nativeFailed = true;
33+
this.yamlParser = new Parser();
34+
this.jsonParser = new Parser();
35+
this.initializeWasmFallback();
36+
}
37+
}
38+
39+
private initializeWasmFallback(): void {
40+
log.info('Initializing WASM fallback...');
41+
this.wasmFallback = new WasmParserFactory();
42+
this.wasmFallback.initialize().catch((error: unknown) => {
43+
log.error(error, 'WASM fallback initialization failed');
44+
});
45+
}
46+
47+
createYamlParser(): Parser {
48+
if (this.nativeFailed && this.wasmFallback) {
49+
return this.wasmFallback.createYamlParser();
50+
}
51+
return this.yamlParser;
52+
}
53+
54+
createJsonParser(): Parser {
55+
if (this.nativeFailed && this.wasmFallback) {
56+
return this.wasmFallback.createJsonParser();
57+
}
58+
return this.jsonParser;
59+
}
60+
}
61+
62+
// Environment detection and factory creation
63+
const shouldForceWasm = (): boolean => {
64+
return process.env.CLOUDFORMATIONLSP_USE_WASM === 'true';
65+
};
66+
67+
// Initialize the factory - async initialization happens in background
68+
let factoryInstance: ParserFactory;
69+
70+
if (shouldForceWasm()) {
71+
log.info('Forcing WASM tree-sitter implementation (CLOUDFORMATIONLSP_USE_WASM=true)');
72+
const wasmFactory = new WasmParserFactory();
73+
// eslint-disable-next-line unicorn/prefer-top-level-await
74+
wasmFactory.initialize().catch((error: unknown) => {
75+
log.error(error, 'Failed to initialize WASM parser factory');
76+
});
77+
factoryInstance = wasmFactory;
78+
} else {
79+
log.info('Using native tree-sitter implementation with WASM fallback');
80+
factoryInstance = new NativeParserFactory();
81+
}
82+
83+
export const parserFactory: ParserFactory = factoryInstance;

0 commit comments

Comments
 (0)