diff --git a/.codespell-ignore-words.txt b/.codespell-ignore-words.txt new file mode 100644 index 0000000000..e7f296d73d --- /dev/null +++ b/.codespell-ignore-words.txt @@ -0,0 +1,268 @@ +# ============================================================================= +# ScanCode Toolkit - Custom Dictionary for codespell +# ============================================================================= +# Project-specific terms, technical words, and proper nouns that should not +# be flagged as spelling mistakes. One word per line. +# Lines starting with # are comments. +# ============================================================================= + +# ----------------------------------------------------------------------------- +# Project Names and Brands +# ----------------------------------------------------------------------------- +scancode +ScanCode +aboutcode +AboutCode +nexb +nexB +packageurl +PackageURL +dejacode +DejaCode +clearcode +ClearCode +vulnerablecode +VulnerableCode +purldb +PurlDB +fetchcode +FetchCode +scanpipe +ScanPipe +matchcode +aboutfiles + +# ----------------------------------------------------------------------------- +# ScanCode Internal Module Names +# ----------------------------------------------------------------------------- +licensedcode +packagedcode +cluecode +extractcode +textcode +formattedcode +summarycode +plugincode +commoncode +typecode +scancode_config + +# ----------------------------------------------------------------------------- +# Dependencies and Libraries +# ----------------------------------------------------------------------------- +saneyaml +pygmars +intbitset +pluggy +jaraco +jinja +lxml +pdfminer +pefile +pkginfo +pymaven +pyahocorasick +chardet +ftfy +jsonstreams +markupsafe +beautifulsoup +dparse +gemfileparser +javaproperties +normality +packvers +publicsuffix +urlpy +xmltodict +tomli +colorama +fasteners +cyseq +multiregex + +# ----------------------------------------------------------------------------- +# Package Managers and Ecosystems +# ----------------------------------------------------------------------------- +pypi +PyPI +nuget +NuGet +rubygems +RubyGems +packagist +cocoapods +CocoaPods +cpan +CPAN +cran +CRAN +opam +conda +hackage +bitbucket +golang +npmjs +crates +chocolatey +sourceforge +launchpad + +# ----------------------------------------------------------------------------- +# License and Legal Terms +# ----------------------------------------------------------------------------- +copyleft +sublicense +sublicensable +relicensing +licensee +licensor +spdx +SPDX +cyclonedx +CycloneDX +sbom +SBOM +purl + +# ----------------------------------------------------------------------------- +# File Formats and Extensions +# ----------------------------------------------------------------------------- +restructuredtext +reStructuredText +pyc +pyo +sdist +bdist +whl +gemfile +Gemfile +gemspec +podspec +cabal +csproj +pom + +# ----------------------------------------------------------------------------- +# Development and Build Tools +# ----------------------------------------------------------------------------- +virtualenv +venv +pytest +xdist +sphinx +setuptools +setuptools_scm +ruff +isort +pycodestyle +flake8 +autopep +doc8 +readthedocs +codecov +tox +makefile +dockerfile +editorconfig +twine +vendorize +autobuild + +# ----------------------------------------------------------------------------- +# ScanCode-Specific Terms +# ----------------------------------------------------------------------------- +codebase +datafile +datafiles +lockfile +subpackage +subpackages +fingerprinting +workbench +pre_scan +post_scan +scanpipe +reindex + +# ----------------------------------------------------------------------------- +# Technical Terms +# ----------------------------------------------------------------------------- +multiprocessing +multicore +preprocessing +preprocessed +serializable +deserialize +deserialized +unmapped +unprocessed +unicode +ascii +utf +commitish +deque +boolean +namespace +namespaces +metadata +hashable +iterable +refactor +refactored +pluggable +encodings +endianness +walkable + +# ----------------------------------------------------------------------------- +# Abbreviations and Acronyms +# ----------------------------------------------------------------------------- +cli +CLI +api +API +oss +OSS +vcs +VCS +uuid +sha +posix +POSIX +elf +ELF +dwarf +DWARF +rpm +RPM +deb +apk +bom +eof +EOF +pe +PE + +# ----------------------------------------------------------------------------- +# Variable Names and Code Identifiers (false positives) +# ----------------------------------------------------------------------------- +te +fo +siz +ro +ws +nd +ened +requestors +IFF + +# ----------------------------------------------------------------------------- +# Other +# ----------------------------------------------------------------------------- +nexb +thead +connexant +Jupyter +thirdparty +re-used diff --git a/.github/workflows/spell-check.yml b/.github/workflows/spell-check.yml new file mode 100644 index 0000000000..e089c15289 --- /dev/null +++ b/.github/workflows/spell-check.yml @@ -0,0 +1,121 @@ +name: Spell Check + +on: + pull_request: + paths: + - '**/*.py' + - '**/*.rst' + - '**/*.md' + - '.codespell-ignore-words.txt' + - 'setup.cfg' + +permissions: + contents: read + +jobs: + spell-check: + name: Spell Check (Docs & Comments) + runs-on: ubuntu-24.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install codespell + run: pip install "codespell>=2.2.0" + + - name: Get changed files + id: changed-files + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + CHANGED_FILES=$(git diff --name-only --diff-filter=ACMRT \ + origin/${{ github.base_ref }}...HEAD 2>/dev/null \ + | grep -E '\.(py|rst|md)$' || true) + else + CHANGED_FILES=$(git diff --name-only --diff-filter=ACMRT \ + HEAD~1 HEAD 2>/dev/null \ + | grep -E '\.(py|rst|md)$' || true) + fi + + # Filter out directories that should be skipped + CHANGED_FILES=$(echo "$CHANGED_FILES" \ + | grep -v -E '^(tests/|samples/|thirdparty/|src/licensedcode/data/rules/)' \ + || true) + + # Remove empty lines + CHANGED_FILES=$(echo "$CHANGED_FILES" | grep -v '^$' || true) + + echo "$CHANGED_FILES" > changed_files.txt + + if [ -s changed_files.txt ]; then + FILE_COUNT=$(wc -l < changed_files.txt | tr -d ' ') + else + FILE_COUNT=0 + fi + echo "file_count=$FILE_COUNT" >> $GITHUB_OUTPUT + + echo "Files to check ($FILE_COUNT):" + cat changed_files.txt || echo "(none)" + + - name: Run codespell on changed files + if: steps.changed-files.outputs.file_count != '0' + continue-on-error: true + run: | + echo "## Spell Check Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + echo "### Files Checked (${{ steps.changed-files.outputs.file_count }})" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "Click to see file list" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat changed_files.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + # Run codespell on changed files + # codespell reads [codespell] config from setup.cfg automatically + SPELL_OUTPUT=$(xargs codespell < changed_files.txt 2>&1) || true + + if [ -n "$SPELL_OUTPUT" ]; then + ISSUE_COUNT=$(echo "$SPELL_OUTPUT" | wc -l | tr -d ' ') + + echo "### Spelling Issues Found ($ISSUE_COUNT)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "Spell check output" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "$SPELL_OUTPUT" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "
" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "---" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### How to Fix" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "1. **Fix the typo** — correct the spelling in your file" >> $GITHUB_STEP_SUMMARY + echo "2. **Add to dictionary** — if it's a valid technical term, add it to \`.codespell-ignore-words.txt\`" >> $GITHUB_STEP_SUMMARY + echo "3. **Inline ignore** — use \`codespell:ignore\` in a comment on the line" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "> **Note:** This check is currently **non-blocking** (warning only)." >> $GITHUB_STEP_SUMMARY + else + echo "### No Spelling Issues Found" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "All checked files passed spell checking." >> $GITHUB_STEP_SUMMARY + fi + + - name: Skip message + if: steps.changed-files.outputs.file_count == '0' + run: | + echo "## Spell Check Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "No documentation or source files were changed in this PR." >> $GITHUB_STEP_SUMMARY diff --git a/setup.cfg b/setup.cfg index 7c45f388fd..72a005c8e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -237,3 +237,11 @@ scancode_output = yaml = formattedcode.output_yaml:YamlOutput cyclonedx = formattedcode.output_cyclonedx:CycloneDxJsonOutput cyclonedx-xml = formattedcode.output_cyclonedx:CycloneDxXmlOutput + + +[codespell] +builtin = clear,rare,informal,code +ignore-words = .codespell-ignore-words.txt +skip = .git,venv,.venv,*.pyc,*.pyo,.cache,.eggs,*.egg-info,dist,build,_build,docs/_build,thirdparty,.tox,node_modules,tests,samples,src/licensedcode/data,src/textcode/data,src/cluecode/data,src/packagedcode/data,src/typecode/data,*.png,*.jpg,*.jpeg,*.gif,*.svg,*.pdf,*.ico,*.whl,*.tar.gz,*.zip,*.ttf,*.woff,*.woff2,*.eot,*.min.js,*.min.css +quiet-level = 2 +count =