Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/commands/scan/fetch-supported-scan-file-names.mts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ export async function fetchSupportedScanFileNames(
): Promise<CResult<SocketSdkSuccessResult<'getReportSupportedFiles'>['data']>> {
const {
sdkOpts,
spinner,
silence = false,
spinner,
} = {
__proto__: null,
...options,
Expand Down
2 changes: 1 addition & 1 deletion src/utils/api.mts
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ export async function handleApiCall<T extends SocketSdkOperations>(
const {
commandPath,
description,
spinner,
silence = false,
spinner,
} = {
__proto__: null,
...options,
Expand Down
46 changes: 35 additions & 11 deletions src/utils/glob.mts
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ export function filterBySupportedScanFiles(
return filepaths.filter(p => micromatch.some(p, patterns, { dot: true }))
}

export function createSupportedFilesFilter(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): (filepath: string) => boolean {
const patterns = getSupportedFilePatterns(supportedFiles)
return (filepath: string) =>
micromatch.some(filepath, patterns, { dot: true })
}

export function getSupportedFilePatterns(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): string[] {
Expand All @@ -178,6 +186,10 @@ export function getSupportedFilePatterns(
}

type GlobWithGitIgnoreOptions = GlobOptions & {
// Optional filter function to apply during streaming.
// When provided, only files passing this filter are accumulated.
// This is critical for memory efficiency when scanning large monorepos.
filter?: ((filepath: string) => boolean) | undefined
socketConfig?: SocketYml | undefined
}

Expand All @@ -187,6 +199,7 @@ export async function globWithGitIgnore(
): Promise<string[]> {
const {
cwd = process.cwd(),
filter,
socketConfig,
...additionalOptions
} = { __proto__: null, ...options } as GlobWithGitIgnoreOptions
Expand Down Expand Up @@ -243,27 +256,38 @@ export async function globWithGitIgnore(
...additionalOptions,
} as GlobOptions

if (!hasNegatedPattern) {
// When no filter is provided and no negated patterns exist, use the fast path.
if (!hasNegatedPattern && !filter) {
return await fastGlob.glob(patterns as string[], globOptions)
}

// Add support for negated "ignore" patterns which many globbing libraries,
// including 'fast-glob', 'globby', and 'tinyglobby', lack support for.
const filtered: string[] = []
const ig = ignore().add([...ignores])
// Use streaming to avoid unbounded memory accumulation.
// This is critical for large monorepos with 100k+ files.
const results: string[] = []
const ig = hasNegatedPattern ? ignore().add([...ignores]) : null
const stream = fastGlob.globStream(
patterns as string[],
globOptions,
) as AsyncIterable<string>
for await (const p of stream) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (!ig.ignores(relPath)) {
filtered.push(p)
// Check gitignore patterns with negation support.
if (ig) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (ig.ignores(relPath)) {
continue
}
}
// Apply the optional filter to reduce memory usage.
// When scanning large monorepos, this filters early (e.g., to manifest files only)
// instead of accumulating all 100k+ files and filtering later.
if (filter && !filter(p)) {
continue
}
results.push(p)
}
return filtered
return results
}

export async function globWorkspace(
Expand Down
252 changes: 252 additions & 0 deletions src/utils/glob.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
import { existsSync, readdirSync, rmSync } from 'node:fs'
import path from 'node:path'
import { fileURLToPath } from 'node:url'

import mockFs from 'mock-fs'
import { afterEach, describe, expect, it } from 'vitest'

import { normalizePath } from '@socketsecurity/registry/lib/path'

import { NODE_MODULES } from '../constants.mjs'
import {
createSupportedFilesFilter,
globWithGitIgnore,
pathsToGlobPatterns,
} from './glob.mts'

import type FileSystem from 'mock-fs/lib/filesystem'

// Filter functions defined at module scope to satisfy linting rules.
function filterJsonFiles(filepath: string): boolean {
return filepath.endsWith('.json')
}

function filterTsFiles(filepath: string): boolean {
return filepath.endsWith('.ts')
}

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)

const rootNmPath = path.join(__dirname, '../..', NODE_MODULES)
const mockFixturePath = normalizePath(path.join(__dirname, 'glob-mock'))
const mockNmPath = normalizePath(rootNmPath)

// Remove broken symlinks in node_modules before loading to prevent mock-fs errors.
function cleanupBrokenSymlinks(dirPath: string): void {
try {
if (!existsSync(dirPath)) {
return
}
const entries = readdirSync(dirPath, { withFileTypes: true })
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name)
try {
if (entry.isSymbolicLink() && !existsSync(fullPath)) {
// Symlink exists but target does not, remove it.
rmSync(fullPath, { force: true })
} else if (entry.isDirectory()) {
// Recursively check subdirectories.
cleanupBrokenSymlinks(fullPath)
}
} catch {
// Ignore errors for individual entries.
}
}
} catch {
// If we cannot read the directory, skip cleanup.
}
}

// Clean up broken symlinks before loading node_modules.
cleanupBrokenSymlinks(rootNmPath)

// Load node_modules with error handling for any remaining issues.
const mockedNmCallback = (() => {
try {
return mockFs.load(rootNmPath)
} catch (e) {
// If loading fails due to broken symlinks or missing files, return empty mock.
console.warn(
`Warning: Failed to load node_modules for mock-fs: ${e instanceof Error ? e.message : String(e)}`,
)
return {}
}
})()

function mockTestFs(config: FileSystem.DirectoryItems) {
return mockFs({
...config,
[mockNmPath]: mockedNmCallback,
})
}

describe('glob utilities', () => {
afterEach(() => {
mockFs.restore()
})

describe('globWithGitIgnore()', () => {
it('should find files matching glob patterns', async () => {
mockTestFs({
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

expect(results.map(normalizePath)).toEqual([
`${mockFixturePath}/package.json`,
])
})

it('should respect .gitignore files', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'ignored/**',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/ignored/package.json`]: '{}',
[`${mockFixturePath}/included/package.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/included/package.json`,
`${mockFixturePath}/package.json`,
])
})

it('should handle negated patterns in .gitignore', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'ignored/**\n!ignored/keep.json',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/ignored/excluded.json`]: '{}',
[`${mockFixturePath}/ignored/keep.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*.json'], {
cwd: mockFixturePath,
})

// The negated pattern should allow keep.json to be included.
expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/ignored/keep.json`,
`${mockFixturePath}/package.json`,
])
})

it('should apply filter function during streaming to reduce memory', async () => {
// Create a mock filesystem with many files.
const files: FileSystem.DirectoryItems = {}
const fileCount = 100
for (let i = 0; i < fileCount; i += 1) {
files[`${mockFixturePath}/file${i}.txt`] = 'content'
files[`${mockFixturePath}/file${i}.json`] = '{}'
}
// Add a gitignore with negated pattern to trigger the streaming path.
files[`${mockFixturePath}/.gitignore`] = 'temp/\n!temp/keep.json'
mockTestFs(files)

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterJsonFiles,
})

// Should only include .json files (100 files).
expect(results).toHaveLength(fileCount)
for (const result of results) {
expect(result.endsWith('.json')).toBe(true)
}
})

it('should apply filter without negated patterns', async () => {
mockTestFs({
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
[`${mockFixturePath}/src/utils.ts`]: '',
[`${mockFixturePath}/readme.md`]: '',
})

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterTsFiles,
})

expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/src/index.ts`,
`${mockFixturePath}/src/utils.ts`,
])
})

it('should combine filter with negated gitignore patterns', async () => {
mockTestFs({
[`${mockFixturePath}/.gitignore`]: 'build/**\n!build/manifest.json',
[`${mockFixturePath}/package.json`]: '{}',
[`${mockFixturePath}/src/index.ts`]: '',
[`${mockFixturePath}/build/output.js`]: '',
[`${mockFixturePath}/build/manifest.json`]: '{}',
})

const results = await globWithGitIgnore(['**/*'], {
cwd: mockFixturePath,
filter: filterJsonFiles,
})

// Should include package.json and the negated build/manifest.json, but not build/output.js.
expect(results.map(normalizePath).sort()).toEqual([
`${mockFixturePath}/build/manifest.json`,
`${mockFixturePath}/package.json`,
])
})
})

describe('createSupportedFilesFilter()', () => {
it('should create a filter function matching supported file patterns', () => {
const supportedFiles = {
npm: {
packagejson: { pattern: 'package.json' },
packagelockjson: { pattern: 'package-lock.json' },
},
}

const filter = createSupportedFilesFilter(supportedFiles)

expect(filter('/path/to/package.json')).toBe(true)
expect(filter('/path/to/package-lock.json')).toBe(true)
expect(filter('/path/to/random.txt')).toBe(false)
expect(filter('/path/to/nested/package.json')).toBe(true)
})
})

describe('pathsToGlobPatterns()', () => {
it('should convert "." to "**/*"', () => {
expect(pathsToGlobPatterns(['.'])).toEqual(['**/*'])
expect(pathsToGlobPatterns(['./'])).toEqual(['**/*'])
})

it('should append "/**/*" to directory paths', () => {
mockTestFs({
[`${mockFixturePath}/subdir`]: {
'file.txt': '',
},
})

// The function checks if path is a directory using isDirSync.
const result = pathsToGlobPatterns(['subdir'], mockFixturePath)
expect(result).toEqual(['subdir/**/*'])
})

it('should keep file paths unchanged', () => {
mockTestFs({
[`${mockFixturePath}/file.txt`]: '',
})

const result = pathsToGlobPatterns(['file.txt'], mockFixturePath)
expect(result).toEqual(['file.txt'])
})
})
})
12 changes: 8 additions & 4 deletions src/utils/path-resolve.mts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { isDirSync } from '@socketsecurity/registry/lib/fs'

import constants, { NODE_MODULES, NPM } from '../constants.mts'
import {
filterBySupportedScanFiles,
createSupportedFilesFilter,
globWithGitIgnore,
pathsToGlobPatterns,
} from './glob.mts'
Expand Down Expand Up @@ -114,13 +114,17 @@ export async function getPackageFilesForScan(
...options,
} as PackageFilesForScanOptions

const filepaths = await globWithGitIgnore(
// Apply the supported files filter during streaming to avoid accumulating
// all files in memory. This is critical for large monorepos with 100k+ files
// where accumulating all paths before filtering causes OOM errors.
const filter = createSupportedFilesFilter(supportedFiles)

return await globWithGitIgnore(
pathsToGlobPatterns(inputPaths, options?.cwd),
{
cwd,
filter,
socketConfig,
},
)

return filterBySupportedScanFiles(filepaths!, supportedFiles)
}
Loading